Skip to content

Instantly share code, notes, and snippets.

View sergiolucero's full-sized avatar
💭
coding the days away

Sergio Lucero sergiolucero

💭
coding the days away
View GitHub Profile
@sergiolucero
sergiolucero / scraper_denuncias_CNTV.py
Last active July 17, 2019 05:05
Scraping CNTV 1/2
import requests, pandas as pd
from bs4 import BeautifulSoup
pd.set_option('max_colwidth',-1)
base_url = 'https://www.cntv.cl/cntv/site/tax/port/all/taxport_16___1.html'
bs = BeautifulSoup(requests.get(base_url).text, 'lxml')
top = pd.DataFrame()
links = [link for link in bs.find_all('a') if 'más' in link.text]
print(f'{len(links)} meses de denuncias')
@sergiolucero
sergiolucero / arrow_BLC.py
Created July 17, 2019 15:15
mapa BLC con flechas
from math import atan
import folium, pandas as pd
viajes = pd.read_html('http://quant.cl/blc_read')[0]
fm = folium.Map(location=[-33.41,-70.59], zoom_start=14,
width=1000, height=400, tiles='stamenwatercolor')
for idx, row in viajes.iterrows():
fromloc, toloc = eval(row['fromloc']), eval(row['toloc']) # viene como "string"
@sergiolucero
sergiolucero / pics_to_ppt.py
Last active August 11, 2021 17:55
PPT compiler
from pptx import Presentation
from pptx.util import Inches
import glob
SLIDE_LAYOUT_TITLE_AND_CONTENT = 1
FILTER = 'fotos/*.jpg' # what to include
prs = Presentation()
slide_layout = prs.slide_layouts[SLIDE_LAYOUT_TITLE_AND_CONTENT]
@sergiolucero
sergiolucero / plotly_sunburst.py
Created July 24, 2019 18:35
sunburst graph in plot.ly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from IPython.display import HTML
trace = go.Sunburst(
labels=["Carlina", "Alejandro", "Sylvia", "Daniel", "Ingrid", "Juan", "Caty", "Sergio", "Laura", "Miguel"],
parents=["", "Carlina", "Carlina", "Sylvia", "Sylvia", "Carlina", "Carlina", "Caty", "Carlina", "Carlina"],
values=[10, 14, 12, 10, 2, 6, 6, 4, 4, 6],
outsidetextfont = {"size": 20, "color": "#377eb8"}, marker = {"line": {"width": 2}},
)
@sergiolucero
sergiolucero / spotifyme.py
Created August 8, 2019 02:24
my spotify playlists
import spotipy
import spotipy.util as util
from creds import CLIENT_ID, CLIENT_SECRET
token = util.prompt_for_user_token(username = 'sergiolucero', scope='',
client_id=CLIENT_ID,
client_secret=CLIENT_SECRET,
redirect_uri='http://quant.cl/what'
)
@sergiolucero
sergiolucero / google_places.py
Created August 27, 2019 03:10
google places
import requests, pandas as pd
from creds import GOOGLE_KEY
GEO_BASE = 'https://maps.googleapis.com/maps/api/geocode/json?address=%s&key=%s'
def georef(address): #1600+Amphitheatre+Parkway,+Mountain+View,+CA
url = GEO_BASE %(address, GOOGLE_KEY)
res = requests.get(url).json().get('results') #returns a list
df = pd.DataFrame(res)
@sergiolucero
sergiolucero / congreso_scraper.py
Created September 11, 2019 15:15
scraper congreso
import pandas as pd
import xml.etree.ElementTree as ET
import requests, sys
def datos(id):
url = f'https://www.senado.cl/wspublico/tramitacion.php?boletin={id}'
r=requests.get(url)
root = ET.fromstring(r.text)
if len(root):
pdatos = [root[0][0][ix].text for ix in [2,4,7]]
@sergiolucero
sergiolucero / asyncio_geocgr.py
Created October 8, 2019 01:05
asyncio scraping geocgr
import pickle, time
import asyncio
import concurrent.futures
import requests
url_base='https://www.contraloria.cl/opencgrapp/geocgr/api/comunas/%05d/newobras'
def get_comunas(region_id):
print(f'START {region_id} [{time.ctime()}]')
region_id = int(region_id)
@sergiolucero
sergiolucero / clustermap.py
Created November 7, 2019 12:07
Folium ClusterMap
import folium
from folium.plugins import MarkerCluster
mc = MarkerCluster()
fm = folium.Map(location=[40.72,-73.98],zoom_start=5)
for _,row in ndf.iterrows():
mc.add_child(folium.Marker(location=[row['lat'],row['lon']]))
mc.add_to(fm)
fm
@sergiolucero
sergiolucero / serverl_pdf2csv.py
Last active March 20, 2021 23:18
código para pasar de un PDF del SERVEL (Plebiscito 2020) a formato CSV
import csv, glob, fitz
def pdf2csv(fn):
csv_fn = fn.replace('.pdf','.csv')
region = int(fn[1:3]) # A04101.pdf -> 4
with open(csv_fn,'w') as fw:
writer = csv.writer(fw)
writer.writerow(['nombre','rut','genero','direccion',