Skip to content

Instantly share code, notes, and snippets.

@sergiolucero
Created March 27, 2020 21:12
Show Gist options
  • Select an option

  • Save sergiolucero/9148b6628ae1a6a1a8f18efd275c29dd to your computer and use it in GitHub Desktop.

Select an option

Save sergiolucero/9148b6628ae1a6a1a8f18efd275c29dd to your computer and use it in GitHub Desktop.
Scraping cancioneros part 2
import requests, pickle, sys
from bs4 import BeautifulSoup
from docx import Document
from operator import methodcaller
url_bs = lambda url: BeautifulSoup(requests.get(url).text,'html5lib') # magic!
def recopila_acordes(artista = 'Manu Chao'): # "Manu Chao" -> manu_chao
fartist = '_'.join(map(methodcaller("lower"),artista.split()))
url = f'https://acordes.lacuerda.net/{fartist}/'
songs = url_bs(url).find_all('ul',attrs={'class':'b_main'})[0].find_all('a')
songs = [(s.text, url+s['href']+'.shtml') for s in songs]
fetch = {' '.join(s[0].split()[:-1]): url_bs(s[1]) for s in songs}
fetched = {}
for song_name, song_page in fetch.items():
fv = song_page.find_all('div',attrs={'id':'t_body'})
try:
fetched[song_name] = fv[0].text
except:
print("Unexpected error:", sys.exc_info()[0])
print('Encontré %d canciones para %s' %(len(fetched), artista))
return fetched
def make_ppt(data_canciones, artista):
document = Document()
document.add_heading(artista, 0)
for songname, song_chords in data_canciones.items():
document.add_heading(songname, level=1)
document.add_paragraph(song_chords) # style='Intense Quote'
fn = f'{artista}.docx';print('SAVING:', fn)
document.save(fn)
def compilar_cancionero(artista):
data_canciones = recopila_acordes(artista)
make_ppt(data_canciones, artista)
if __name__ == '__main__':
ARTISTAS = ['Charly Garcia','Fito Paez','Soda Stereo', 'Los Tres']
for artist in ARTISTAS:
compilar_cancionero(artist)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment