Created
September 26, 2020 01:58
-
-
Save tallpeak/8df7e1a63892e159e7aa741cda53b217 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import gzip | |
import glob | |
import psycopg2 | |
import pathlib | |
import time | |
import os | |
pgpass = open(os.path.expanduser("~/secrets/pgpass.txt"), "r").read() | |
files = [] | |
p = os.path.expanduser("~/") # = path; add Downloads/ if necessary | |
for f in ("XM*.gz","SFA8.gz"): | |
for g in glob.glob(p + f): | |
files.append(g) | |
filename = sorted(files, key=os.path.getmtime)[-1] # latest | |
print("importing %s" % filename) | |
fldpos = [] | |
sfa8fmt = """8.0 | |
35 | |
1 SQLCHAR 0 2 "" 1 action Latin1_General_CI_AI | |
2 SQLCHAR 0 8 "" 2 x_change_date "" | |
3 SQLCHAR 0 6 "" 3 x_change_time "" | |
4 SQLCHAR 0 5 "" 4 label_number "" | |
5 SQLCHAR 0 7 "" 5 catalog_number "" | |
6 SQLCHAR 0 1 "" 6 config_number "" | |
7 SQLCHAR 0 30 "" 7 artist_name Latin1_General_CI_AI | |
8 SQLCHAR 0 30 "" 8 title Latin1_General_CI_AI | |
9 SQLCHAR 0 30 "" 9 desc_1 Latin1_General_CI_AI | |
10 SQLCHAR 0 30 "" 10 desc_2 Latin1_General_CI_AI | |
11 SQLCHAR 0 13 "" 11 upc_number Latin1_General_CI_AI | |
12 SQLCHAR 0 5 "" 12 upc_extension "" | |
13 SQLCHAR 0 4 "" 13 label_abbr Latin1_General_CI_AI | |
14 SQLCHAR 0 30 "" 14 label_name Latin1_General_CI_AI | |
15 SQLCHAR 0 10 "" 15 dist_sup_number "" | |
16 SQLCHAR 0 9 "" 16 SRP "" | |
17 SQLCHAR 0 9 "" 17 cost "" | |
18 SQLCHAR 0 4 "" 18 x_supplier_price_code Latin1_General_CI_AI | |
19 SQLCHAR 0 4 "" 19 x_supplier_prefix Latin1_General_CI_AI | |
20 SQLCHAR 0 8 "" 20 street_date "" | |
21 SQLCHAR 0 8 "" 21 x_receipt_date "" | |
22 SQLCHAR 0 8 "" 22 discontinue_date "" | |
23 SQLCHAR 0 8 "" 23 x_last_return "" | |
24 SQLCHAR 0 8 "" 24 x_last_order "" | |
25 SQLCHAR 0 5 "" 25 music_category "" | |
26 SQLCHAR 0 3 "" 26 x_priority "" | |
27 SQLCHAR 0 3 "" 27 x_config_ext "" | |
28 SQLCHAR 0 5 "" 28 x_general_category "" | |
29 SQLCHAR 0 12 "" 29 link_id "" | |
30 SQLCHAR 0 9 "" 30 x_curr_purch_price "" | |
31 SQLCHAR 0 11 "" 31 x_sales "" | |
32 SQLCHAR 0 3 "" 32 x_full_case_pack "" | |
33 SQLCHAR 0 17 "" 33 sfa8_id "" | |
34 SQLCHAR 0 5 "" 34 group_category "" | |
35 SQLCHAR 0 15 "\\r\\n" 35 x_cat_num Latin1_General_CI_AI | |
""" | |
fld = 0 | |
p = 0 | |
#with open("sfa8.fmt","r") as rdr: | |
for ln in sfa8fmt.split("\n"): | |
row = str.split(ln.rstrip()) | |
if len(row) > 6: | |
l = int(row[3]) | |
fldpos.append((p,l,row[6])) | |
p = p + l | |
fld = fld + 1 | |
print(fldpos) | |
con = psycopg2.connect(database="streetpulse", user="postgres", | |
password=pgpass, host="qomph.com", port=5432) | |
cur = con.cursor() | |
cur.execute("""DROP TABLE IF EXISTS stg_superfile; | |
CREATE TABLE stg_superfile ( | |
action text NOT NULL, | |
x_change_date text NOT NULL, | |
x_change_time text NOT NULL, | |
label_number text NOT NULL, | |
catalog_number text NOT NULL, | |
config_number text NOT NULL, | |
artist_name text NOT NULL, | |
title text NOT NULL, | |
desc_1 text NOT NULL, | |
desc_2 text NOT NULL, | |
upc_number text NOT NULL, | |
upc_extension integer NOT NULL, | |
label_abbr text NOT NULL, | |
label_name text NOT NULL, | |
dist_sup_number text, | |
srp text NOT NULL, | |
cost text NOT NULL, | |
x_supplier_price_code text NOT NULL, | |
x_supplier_prefix text NOT NULL, | |
street_date text NOT NULL, | |
x_receipt_date text NOT NULL, | |
discontinue_date text NOT NULL, | |
x_last_return text NOT NULL, | |
x_last_order text NOT NULL, | |
music_category text NOT NULL, | |
x_priority text NOT NULL, | |
x_config_ext text NOT NULL, | |
x_general_category text NOT NULL, | |
link_id text NOT NULL, | |
x_curr_purch_price text NOT NULL, | |
x_sales text NOT NULL, | |
x_full_case_pack text NOT NULL, | |
sfa8_id text, | |
group_category text NOT NULL, | |
x_cat_num text | |
); | |
ALTER TABLE stg_superfile OWNER TO postgres;""") | |
#cur.execute("truncate table stg_superfile") | |
rowcount = 0 | |
starttime = time.time() | |
with gzip.GzipFile(filename, "rb") as f: | |
for ln in f: | |
row = [] | |
for (p,l,fldnam) in fldpos: | |
try: | |
fld = ln[p:p+l].decode("cp437") #not cp1250 | |
except Exception as inst: | |
print(type(inst)) # the exception instance | |
print(inst.args) # arguments stored in .args | |
print(inst) # __str__ allows args to be printed directly, | |
row.append(fld.rstrip()) | |
#print(fldnam,"\t",fld) | |
cur.execute("""insert into stg_superfile values( | |
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, | |
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, | |
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, | |
%s, %s, %s, %s, %s)""", row) # 35 fields | |
rowcount += 1 | |
if (rowcount % 1000) == 0 : | |
print("%7d %s" % (rowcount, time.asctime())) | |
seconds = time.time() - starttime | |
con.commit() | |
print("imported %d rows in %.3f seconds (%.3f records/sec)" | |
% (rowcount, seconds, rowcount/seconds) ) | |
# update tbl_superfile from stg_superfile | |
starttime = time.time() | |
cur = con.cursor() | |
cur.execute("select * from import_superfile();") | |
cur.execute("call superfile_post_import();") | |
con.commit() | |
seconds = time.time() - starttime | |
print("Superfile updates took %.3f seconds" % seconds) | |
con.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment