Last active
October 17, 2015 17:02
-
-
Save tinogis/a54ee117d4d5d2b267a3 to your computer and use it in GitHub Desktop.
117_Distribuidores.pdf REE file parsing. Creates a csv with extracted information. Can generate a sql for GISCE-ERP ref2 update. It may be useful for 117_Comercilizadores.pdf file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# pdftotext 117_Distribuidores.pdf creates 117_Distribuidores.txt | |
# pdftotext 117_Comercializadores.pdf creates 117_Comercializadores.txt | |
import csv | |
import sys | |
import re | |
agent = 'comer' | |
#agent = 'distri' | |
numbertest = True | |
assql = False | |
if agent == 'distri': | |
filename = '117_Distribuidores.txt' | |
else: | |
filename = '117_Comercializadores.txt' | |
assql = False | |
lines = [] | |
# matches i.e '1 de 11' | |
num_pag = re.compile(r'[0-9]+ de [0-9]+') | |
with open(filename, 'r') as fitxer: | |
csvfile = csv.reader(fitxer, delimiter='|') | |
for row in csvfile: | |
if row: | |
if num_pag.search(row[0]): | |
for i in range(0, 1): | |
lines.pop() | |
continue | |
if len(row[0]) > 70: | |
# too long row (on comer) | |
lines.append(row[0][:70]) | |
lines.append(row[0][69:]) | |
elif 'Baja a' in row[0]: | |
parts = row[0].split('B', 2) | |
lines.append(parts[0]) | |
lines.append('B' + parts[1]) | |
else: | |
lines.append(row[0]) | |
agents = [lines[x:x + 5] for x in range(0, len(lines), 5)] | |
sql = "UPDATE res_partner SET ref2='{0}' WHERE ref='{1}';" | |
for agent in agents: | |
if numbertest and '0' not in agent[0] and '1' not in agent[0]: | |
continue | |
if not assql: | |
sys.stdout.write(';'.join(agent)) | |
else: | |
sys.stdout.write(sql.format(agent[2], agent[0])) | |
sys.stdout.write("\n") | |
sys.stdout.flush() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment