Skip to content

Instantly share code, notes, and snippets.

@melpomene
Created May 21, 2012 00:48
Show Gist options
  • Save melpomene/2760082 to your computer and use it in GitHub Desktop.
Save melpomene/2760082 to your computer and use it in GitHub Desktop.
Convert html songbook to JSON
""" A small script to crawl a website with some songs on it and put it in a JSON file format."""
import requests, re, json
re.DEBUG = True
URL = "http://www.hedin.mobi/sangbok/lista.php"
if __name__ == "__main__":
r = requests.get(URL)
reg = re.compile(r"<h2>(.+?)</h2><p>Melodi:(.*?)</p><p>(.+?)</p>")
songs = reg.findall(r.content.replace("\r\n", '').decode('iso8859-1'))
song_list = []
for song in songs:
new_song = {"title": song[0].replace("<br />",
"\n"),
"credits": "" ,
"melody": song[1].replace("<br />", "\\n"),
"lyric": song[2].replace("<br />", "\\n")}
song_list.append(new_song)
json_string = json.dumps(song_list)
f = open("test.json", 'w')
f.write(json_string.encode("utf8"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment