Skip to content

Instantly share code, notes, and snippets.

@helb
Created May 23, 2017 15:01
Show Gist options
  • Save helb/b799512bda26ec0dce32042c55da81ae to your computer and use it in GitHub Desktop.
Save helb/b799512bda26ec0dce32042c55da81ae to your computer and use it in GitHub Desktop.
lyricsgen
import requests
from random import choice
from bs4 import BeautifulSoup
import json
import peewee
db = peewee.SqliteDatabase("lyricsgen.sqlite3")
parser = "html.parser"
class Artist(peewee.Model):
name = peewee.CharField(unique=True, index=True)
num_songs = peewee.IntegerField()
text = peewee.TextField()
model = peewee.TextField(null=True)
class Meta:
database = db
def getLyrics(artist_url):
try:
artist = Artist.get(Artist.name == artist_name)
except Artist.DoesNotExist:
print("> Fetching songs…")
url = f"http://www.karaoketexty.cz/texty-pisni/{artist_name}"
artist_page = BeautifulSoup(requests.get(url).content, parser)
songs = artist_page.select("td.left a")
trainingText = ""
songsProcessed = 0
for song in songs:
url = f"http://www.karaoketexty.cz/{song['href']}"
song_page = BeautifulSoup(requests.get(url).content, parser)
song_lyrics = song_page.select("p.text")[0].text
trainingText += song_lyrics + "\n"
songsProcessed += 1
Artist.create(name=artist_name,
num_songs=songsProcessed,
text=trainingText,
model=None)
print("> Songs saved to DB.")
else:
print("> Using songs from DB.")
trainingText = artist.text
return(trainingText)
def getModel(artist_name, order):
def generate_model():
print("> Model not found, generating one…")
text = getLyrics(artist_name)
model = {}
for i in range(0, len(text) - order):
fragment = text[i:i + order] # Range is exclusive at upper bound
nextLetter = text[i + order] # So this is the next letter
if fragment not in model:
model[fragment] = {}
if nextLetter not in model[fragment]:
model[fragment][nextLetter] = 1
else:
model[fragment][nextLetter] += 1
artist = Artist.get(Artist.name == artist_name)
artist.model = json.dumps(model)
artist.save()
print("> Model saved to DB.")
return(model)
try:
artist = Artist.get(Artist.name == artist_name)
except Artist.DoesNotExist:
model = generate_model()
else:
if artist.model is None:
model = generate_model()
else:
print("> Using model from DB.")
model = json.loads(artist.model)
return(model)
def getNextCharacter(model, fragment):
letters = []
for letter in model[fragment].keys():
for occurences in range(0, model[fragment][letter]):
# So random.choice has a greater weighted chance of
# selecting this one
letters.append(letter)
return(choice(letters))
def generateLyrics(artist_name, order, length):
model = getModel(artist_name, order)
text = getLyrics(artist_name)
currentFragment = text[0:order]
output = ""
print("> Generating lyrics…\n\n")
for i in range(0, length - order):
newCharacter = getNextCharacter(model, currentFragment)
output += newCharacter
currentFragment = currentFragment[1:] + newCharacter
return(output)
db.connect()
try:
db.create_tables([Artist])
except peewee.OperationalError:
pass
artist_name = input("Enter artist:\n")
newLyrics = generateLyrics(artist_name, 8, 600)
newLyrics = newLyrics[newLyrics.find("\n") + 1:newLyrics.rfind("\n")]
print(newLyrics)
import requests
from bs4 import BeautifulSoup
import sys
parser = "html.parser"
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def getLyrics(artist_name):
url = f"http://texty.kompletne.cz/{artist_name}"
artist_page = BeautifulSoup(requests.get(url).content, parser)
songs = artist_page.select(".album_songy a")
trainingText = ""
songsProcessed = 0
for song in songs:
eprint(f"processing {song['href']}")
url = f"http://texty.kompletne.cz/{song['href']}"
song_page = BeautifulSoup(requests.get(url).content, parser)
song_lyrics = song_page.select("#text_center")[0].text
trainingText += song_lyrics + "\n"
songsProcessed += 1
eprint(songsProcessed)
return trainingText
artist_name = str(sys.argv[1])
print(getLyrics(artist_name))
beautifulsoup4
flake8
requests
peewee
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment