Skip to content

Instantly share code, notes, and snippets.

@aydinemre
Created March 20, 2021 00:13
Show Gist options
  • Save aydinemre/fb231dd0ecdaf077b3ee4d30dd6d964b to your computer and use it in GitHub Desktop.
Save aydinemre/fb231dd0ecdaf077b3ee4d30dd6d964b to your computer and use it in GitHub Desktop.
Get all song lyrics for single artist from sarkisozlerihd
#!/usr/bin/env python
# coding: utf-8
from time import sleep
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm
from unidecode import unidecode
artist = ''
URL = f'https://www.sarkisozlerihd.com/sarkici/{unidecode("-".join(artist.split(" ")).lower())}'
print(f"URL --> {URL}")
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
songs = soup.find_all(class_='list-line margint10 clearfix')
lyrics = []
pbar = tqdm(songs)
for song in pbar:
song_href = song.find('a', href=True).get('href')
song_page = requests.get(song_href)
song_soup = BeautifulSoup(song_page.content, 'html.parser')
song_name = song_soup.find(class_="pull-left lyrictitlefix otto").find('h1').getText()
song_lyrics = ' '.join([lines.getText().strip() for lines in song_soup.find(class_='lyric-text margint20 marginb20').find_all('p')]).strip()
pbar.set_description(song_name)
lyrics.append({'song': song_name, 'lyrics': song_lyrics})
sleep(1)
lyrics_df = pd.DataFrame(lyrics)
lyrics_df.head()
lyrics_df['song'] = lyrics_df.song.str.replace(artist, '')
lyrics_df['song'] = lyrics_df['song'].str.replace('Şarkı Sözleri', '').str.strip()
lyrics_df.head()
lyrics_df.to_csv(f'{artist}-lyrics.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment