Skip to content

Instantly share code, notes, and snippets.

@tallpeak
Created September 26, 2020 01:58
Show Gist options
  • Save tallpeak/8df7e1a63892e159e7aa741cda53b217 to your computer and use it in GitHub Desktop.
Save tallpeak/8df7e1a63892e159e7aa741cda53b217 to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
import gzip
import glob
import psycopg2
import pathlib
import time
import os
pgpass = open(os.path.expanduser("~/secrets/pgpass.txt"), "r").read()
files = []
p = os.path.expanduser("~/") # = path; add Downloads/ if necessary
for f in ("XM*.gz","SFA8.gz"):
for g in glob.glob(p + f):
files.append(g)
filename = sorted(files, key=os.path.getmtime)[-1] # latest
print("importing %s" % filename)
fldpos = []
sfa8fmt = """8.0
35
1 SQLCHAR 0 2 "" 1 action Latin1_General_CI_AI
2 SQLCHAR 0 8 "" 2 x_change_date ""
3 SQLCHAR 0 6 "" 3 x_change_time ""
4 SQLCHAR 0 5 "" 4 label_number ""
5 SQLCHAR 0 7 "" 5 catalog_number ""
6 SQLCHAR 0 1 "" 6 config_number ""
7 SQLCHAR 0 30 "" 7 artist_name Latin1_General_CI_AI
8 SQLCHAR 0 30 "" 8 title Latin1_General_CI_AI
9 SQLCHAR 0 30 "" 9 desc_1 Latin1_General_CI_AI
10 SQLCHAR 0 30 "" 10 desc_2 Latin1_General_CI_AI
11 SQLCHAR 0 13 "" 11 upc_number Latin1_General_CI_AI
12 SQLCHAR 0 5 "" 12 upc_extension ""
13 SQLCHAR 0 4 "" 13 label_abbr Latin1_General_CI_AI
14 SQLCHAR 0 30 "" 14 label_name Latin1_General_CI_AI
15 SQLCHAR 0 10 "" 15 dist_sup_number ""
16 SQLCHAR 0 9 "" 16 SRP ""
17 SQLCHAR 0 9 "" 17 cost ""
18 SQLCHAR 0 4 "" 18 x_supplier_price_code Latin1_General_CI_AI
19 SQLCHAR 0 4 "" 19 x_supplier_prefix Latin1_General_CI_AI
20 SQLCHAR 0 8 "" 20 street_date ""
21 SQLCHAR 0 8 "" 21 x_receipt_date ""
22 SQLCHAR 0 8 "" 22 discontinue_date ""
23 SQLCHAR 0 8 "" 23 x_last_return ""
24 SQLCHAR 0 8 "" 24 x_last_order ""
25 SQLCHAR 0 5 "" 25 music_category ""
26 SQLCHAR 0 3 "" 26 x_priority ""
27 SQLCHAR 0 3 "" 27 x_config_ext ""
28 SQLCHAR 0 5 "" 28 x_general_category ""
29 SQLCHAR 0 12 "" 29 link_id ""
30 SQLCHAR 0 9 "" 30 x_curr_purch_price ""
31 SQLCHAR 0 11 "" 31 x_sales ""
32 SQLCHAR 0 3 "" 32 x_full_case_pack ""
33 SQLCHAR 0 17 "" 33 sfa8_id ""
34 SQLCHAR 0 5 "" 34 group_category ""
35 SQLCHAR 0 15 "\\r\\n" 35 x_cat_num Latin1_General_CI_AI
"""
fld = 0
p = 0
#with open("sfa8.fmt","r") as rdr:
for ln in sfa8fmt.split("\n"):
row = str.split(ln.rstrip())
if len(row) > 6:
l = int(row[3])
fldpos.append((p,l,row[6]))
p = p + l
fld = fld + 1
print(fldpos)
con = psycopg2.connect(database="streetpulse", user="postgres",
password=pgpass, host="qomph.com", port=5432)
cur = con.cursor()
cur.execute("""DROP TABLE IF EXISTS stg_superfile;
CREATE TABLE stg_superfile (
action text NOT NULL,
x_change_date text NOT NULL,
x_change_time text NOT NULL,
label_number text NOT NULL,
catalog_number text NOT NULL,
config_number text NOT NULL,
artist_name text NOT NULL,
title text NOT NULL,
desc_1 text NOT NULL,
desc_2 text NOT NULL,
upc_number text NOT NULL,
upc_extension integer NOT NULL,
label_abbr text NOT NULL,
label_name text NOT NULL,
dist_sup_number text,
srp text NOT NULL,
cost text NOT NULL,
x_supplier_price_code text NOT NULL,
x_supplier_prefix text NOT NULL,
street_date text NOT NULL,
x_receipt_date text NOT NULL,
discontinue_date text NOT NULL,
x_last_return text NOT NULL,
x_last_order text NOT NULL,
music_category text NOT NULL,
x_priority text NOT NULL,
x_config_ext text NOT NULL,
x_general_category text NOT NULL,
link_id text NOT NULL,
x_curr_purch_price text NOT NULL,
x_sales text NOT NULL,
x_full_case_pack text NOT NULL,
sfa8_id text,
group_category text NOT NULL,
x_cat_num text
);
ALTER TABLE stg_superfile OWNER TO postgres;""")
#cur.execute("truncate table stg_superfile")
rowcount = 0
starttime = time.time()
with gzip.GzipFile(filename, "rb") as f:
for ln in f:
row = []
for (p,l,fldnam) in fldpos:
try:
fld = ln[p:p+l].decode("cp437") #not cp1250
except Exception as inst:
print(type(inst)) # the exception instance
print(inst.args) # arguments stored in .args
print(inst) # __str__ allows args to be printed directly,
row.append(fld.rstrip())
#print(fldnam,"\t",fld)
cur.execute("""insert into stg_superfile values(
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s)""", row) # 35 fields
rowcount += 1
if (rowcount % 1000) == 0 :
print("%7d %s" % (rowcount, time.asctime()))
seconds = time.time() - starttime
con.commit()
print("imported %d rows in %.3f seconds (%.3f records/sec)"
% (rowcount, seconds, rowcount/seconds) )
# update tbl_superfile from stg_superfile
starttime = time.time()
cur = con.cursor()
cur.execute("select * from import_superfile();")
cur.execute("call superfile_post_import();")
con.commit()
seconds = time.time() - starttime
print("Superfile updates took %.3f seconds" % seconds)
con.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment