Created
March 27, 2020 21:12
-
-
Save sergiolucero/9148b6628ae1a6a1a8f18efd275c29dd to your computer and use it in GitHub Desktop.
Scraping cancioneros part 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests, pickle, sys | |
| from bs4 import BeautifulSoup | |
| from docx import Document | |
| from operator import methodcaller | |
| url_bs = lambda url: BeautifulSoup(requests.get(url).text,'html5lib') # magic! | |
| def recopila_acordes(artista = 'Manu Chao'): # "Manu Chao" -> manu_chao | |
| fartist = '_'.join(map(methodcaller("lower"),artista.split())) | |
| url = f'https://acordes.lacuerda.net/{fartist}/' | |
| songs = url_bs(url).find_all('ul',attrs={'class':'b_main'})[0].find_all('a') | |
| songs = [(s.text, url+s['href']+'.shtml') for s in songs] | |
| fetch = {' '.join(s[0].split()[:-1]): url_bs(s[1]) for s in songs} | |
| fetched = {} | |
| for song_name, song_page in fetch.items(): | |
| fv = song_page.find_all('div',attrs={'id':'t_body'}) | |
| try: | |
| fetched[song_name] = fv[0].text | |
| except: | |
| print("Unexpected error:", sys.exc_info()[0]) | |
| print('Encontré %d canciones para %s' %(len(fetched), artista)) | |
| return fetched | |
| def make_ppt(data_canciones, artista): | |
| document = Document() | |
| document.add_heading(artista, 0) | |
| for songname, song_chords in data_canciones.items(): | |
| document.add_heading(songname, level=1) | |
| document.add_paragraph(song_chords) # style='Intense Quote' | |
| fn = f'{artista}.docx';print('SAVING:', fn) | |
| document.save(fn) | |
| def compilar_cancionero(artista): | |
| data_canciones = recopila_acordes(artista) | |
| make_ppt(data_canciones, artista) | |
| if __name__ == '__main__': | |
| ARTISTAS = ['Charly Garcia','Fito Paez','Soda Stereo', 'Los Tres'] | |
| for artist in ARTISTAS: | |
| compilar_cancionero(artist) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment