Skip to content

Instantly share code, notes, and snippets.

@tinogis
Last active October 17, 2015 17:02
Show Gist options
  • Save tinogis/a54ee117d4d5d2b267a3 to your computer and use it in GitHub Desktop.
Save tinogis/a54ee117d4d5d2b267a3 to your computer and use it in GitHub Desktop.
117_Distribuidores.pdf REE file parsing. Creates a csv with extracted information. Can generate a sql for GISCE-ERP ref2 update. It may be useful for 117_Comercilizadores.pdf file
# -*- coding: utf-8 -*-
# pdftotext 117_Distribuidores.pdf creates 117_Distribuidores.txt
# pdftotext 117_Comercializadores.pdf creates 117_Comercializadores.txt
import csv
import sys
import re
agent = 'comer'
#agent = 'distri'
numbertest = True
assql = False
if agent == 'distri':
filename = '117_Distribuidores.txt'
else:
filename = '117_Comercializadores.txt'
assql = False
lines = []
# matches i.e '1 de 11'
num_pag = re.compile(r'[0-9]+ de [0-9]+')
with open(filename, 'r') as fitxer:
csvfile = csv.reader(fitxer, delimiter='|')
for row in csvfile:
if row:
if num_pag.search(row[0]):
for i in range(0, 1):
lines.pop()
continue
if len(row[0]) > 70:
# too long row (on comer)
lines.append(row[0][:70])
lines.append(row[0][69:])
elif 'Baja a' in row[0]:
parts = row[0].split('B', 2)
lines.append(parts[0])
lines.append('B' + parts[1])
else:
lines.append(row[0])
agents = [lines[x:x + 5] for x in range(0, len(lines), 5)]
sql = "UPDATE res_partner SET ref2='{0}' WHERE ref='{1}';"
for agent in agents:
if numbertest and '0' not in agent[0] and '1' not in agent[0]:
continue
if not assql:
sys.stdout.write(';'.join(agent))
else:
sys.stdout.write(sql.format(agent[2], agent[0]))
sys.stdout.write("\n")
sys.stdout.flush()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment