Created
March 20, 2021 00:13
-
-
Save aydinemre/fb231dd0ecdaf077b3ee4d30dd6d964b to your computer and use it in GitHub Desktop.
Get all song lyrics for single artist from sarkisozlerihd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
from time import sleep | |
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
from tqdm import tqdm | |
from unidecode import unidecode | |
artist = '' | |
URL = f'https://www.sarkisozlerihd.com/sarkici/{unidecode("-".join(artist.split(" ")).lower())}' | |
print(f"URL --> {URL}") | |
page = requests.get(URL) | |
soup = BeautifulSoup(page.content, 'html.parser') | |
songs = soup.find_all(class_='list-line margint10 clearfix') | |
lyrics = [] | |
pbar = tqdm(songs) | |
for song in pbar: | |
song_href = song.find('a', href=True).get('href') | |
song_page = requests.get(song_href) | |
song_soup = BeautifulSoup(song_page.content, 'html.parser') | |
song_name = song_soup.find(class_="pull-left lyrictitlefix otto").find('h1').getText() | |
song_lyrics = ' '.join([lines.getText().strip() for lines in song_soup.find(class_='lyric-text margint20 marginb20').find_all('p')]).strip() | |
pbar.set_description(song_name) | |
lyrics.append({'song': song_name, 'lyrics': song_lyrics}) | |
sleep(1) | |
lyrics_df = pd.DataFrame(lyrics) | |
lyrics_df.head() | |
lyrics_df['song'] = lyrics_df.song.str.replace(artist, '') | |
lyrics_df['song'] = lyrics_df['song'].str.replace('Şarkı Sözleri', '').str.strip() | |
lyrics_df.head() | |
lyrics_df.to_csv(f'{artist}-lyrics.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment