Created
September 6, 2020 21:46
-
-
Save acarril/734411ba2aa7946698d17d10a56537f1 to your computer and use it in GitHub Desktop.
Scrape download links with CMF data (Comisión nacional para el Mercado Financiero, Chile)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
import requests | |
import bs4 | |
import csv | |
# Setup | |
url = 'http://www.cmfchile.cl/portal/principal/605/w3-article-25080.html' | |
headers = { | |
'Host': 'www.svs.cl', | |
'Connection': 'keep-alive', | |
'Pragma': 'no-cache', | |
'Cache-Control': 'no-cache', | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36', | |
'Accept': 'text/css,*/*;q=0.1', | |
'Referer': 'http://www.svs.cl/portal/css/screen.css', | |
'Accept-Encoding': 'gzip, deflate', | |
'Accept-Language': 'en-US,en;q=0.9' | |
} | |
# Request and process website | |
response = requests.get(url, headers = headers) | |
page_soup = bs4.BeautifulSoup(response.text, 'html.parser') | |
link_soup = page_soup.find('div', {'id':'article_i__w3_ar_articuloCompleto_1'}).find_all('a') | |
# Collect links | |
links = [] | |
for link in link_soup: | |
links.append(link.get('href')) | |
# Write links | |
with open('cmf_links.csv', 'w') as file: | |
wr = csv.writer(file, delimiter = '\n') | |
wr.writerow(links) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment