Created
January 7, 2019 16:23
-
-
Save elvisgs/f43704b2ac16a9cb1f93c148567b6ac8 to your computer and use it in GitHub Desktop.
Script para dividir CSV de publicações do SIJ por edição e salvar como JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import itertools | |
import json | |
import os | |
import operator | |
import re | |
import sys | |
cwd = '/home/elvis/playground/stm' | |
csv.field_size_limit(sys.maxsize) | |
path_csv = os.path.join(cwd, 'publicacoes-stm.csv') | |
def divide_publicacoes(): | |
with open(path_csv, 'r') as csv_file: | |
csv_reader = csv.DictReader(csv_file) | |
grouped = itertools.groupby(csv_reader, | |
operator.itemgetter('nome_jornal_edicao')) | |
for group, items in grouped: | |
items = list(map(renomear_chaves, items)) | |
data, nro_edicao = obtem_data_e_numero_edicao(group) | |
print(f'{data} ({nro_edicao}): {len(items)} publicações') | |
json_content = json.dumps(items, indent=2, ensure_ascii=False) | |
json_path = path_csv.replace('.csv', f'_{data}_{nro_edicao}.json') | |
with open(json_path, 'w') as json_file: | |
json_file.write(json_content) | |
def obtem_data_e_numero_edicao(jornal): | |
match = re.search(r'(\d{2}/\d{2}/\d{4}) Nº (\d+)', jornal) | |
return '-'.join(reversed(match[1].split('/'))), match[2] | |
def renomear_chaves(publicacao): | |
publicacao['codigo'] = publicacao.pop('cod_publicacao') | |
publicacao['textoprocesso'] = publicacao.pop('texto_processo') | |
publicacao['textodespacho'] = publicacao.pop('texto_despacho') | |
return publicacao | |
if __name__ == '__main__': | |
divide_publicacoes() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment