Skip to content

Instantly share code, notes, and snippets.

@sergiolucero
Created July 17, 2019 03:55
Show Gist options
  • Save sergiolucero/227dd4dfb7710458b1d314bd94c31fa3 to your computer and use it in GitHub Desktop.
Save sergiolucero/227dd4dfb7710458b1d314bd94c31fa3 to your computer and use it in GitHub Desktop.
import requests, pandas as pd
from bs4 import BeautifulSoup
CLIO_DEF = 'https://www.yapo.cl/chile/inmuebles?ca=15_s&l=0&q=casa&cmn=&st=a'
def text_search(bs, classname):
return [p0.text.strip()
for p0 in bs.find_all('span', attrs={'class': classname})]
def scrape_yapo(url = CLIO_DEF):
bs = BeautifulSoup(requests.get(url).text,'lxml')
NmaX = 47
precios = text_search(bs, 'price')[:NmaX]
cprecios = text_search(bs, 'convertedPrice')[:NMaX]
regiones = text_search(bs, 'region')[:NMaX]
comunas = text_search(bs, 'commune')[:NMaX]
metros = text_search(bs, 'icons__element-text')
metros = [m for m in metros if 'm2' in m][:len(comunas)] # U CHEAT
descs = [p0.text for p0 in bs.find_all('a', attrs={'class':'title'})]
print([len(x) for x in [precios,cprecios,regiones,comunas,metros]])
df = pd.DataFrame(dict(descripción=descs, region=regiones, comuna=comunas,
precio=precios, UF=cprecios, m2=metros))
df['UF_m2'] = [row['UF'].split(' ')[1].split(',')[0].replace('.','') + '/' +
row['m2'].split(' ')[0]
for _, row in df.iterrows()]
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment