Skip to content

Instantly share code, notes, and snippets.

@sergiolucero
Created September 11, 2019 15:15
Show Gist options
  • Save sergiolucero/86bb15b271bb76adc81ae687122f5982 to your computer and use it in GitHub Desktop.
Save sergiolucero/86bb15b271bb76adc81ae687122f5982 to your computer and use it in GitHub Desktop.
scraper congreso
import pandas as pd
import xml.etree.ElementTree as ET
import requests, sys
def datos(id):
url = f'https://www.senado.cl/wspublico/tramitacion.php?boletin={id}'
r=requests.get(url)
root = ET.fromstring(r.text)
if len(root):
pdatos = [root[0][0][ix].text for ix in [2,4,7]]
autores = [a[0].text for a in root[0][1]]
#print(autores)
#%pdb
pdatos.append(autores)
else:
pdatos = []
if id%20==10:
print(id,pdatos)
return pdatos
if __name__ == '__main__':
sta, sto = sys.argv[1:]
print(sta,sto)
fechas = []; camaras = [];
ids = []; comisiones = []; auth = []
for id in range(int(sta),int(sto)):
try:
fecha, camara, comisión, autores = datos(id)
fecha = fecha[-4:]+'/'+fecha[3:5]+'/'+fecha[:2]
fechas.append(fecha), camaras.append(camara)
ids.append(id); comisiones.append(comisión)
auth.append(autores)
except:
print('No DATA:', id)
df = pd.DataFrame(dict(id=ids,fecha=fechas,
camara=camaras,comision=comisiones, autores=auth))
gdf=df.groupby(['camara','comision']).agg({'id':[min,max],'fecha':[min,max]})
gdf.to_html('resumen.html')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment