Created
February 6, 2012 02:05
-
-
Save scturtle/1748942 to your computer and use it in GitHub Desktop.
Download DoubanFM favourite songs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import os, urllib, urllib2, re, json | |
baseurl = 'http://douban.fm/j/mine/playlist?type=n&h=&channel=0&context=channel:0|subject_id:%s' | |
musicdir = os.path.abspath('./music') | |
num_pattern = re.compile(r'(\d+)') | |
def get_json_by_album(href): | |
''' get direct url of songs by album url ''' | |
subject_id = num_pattern.search(href).groups()[0] | |
content = urllib2.urlopen(baseurl % subject_id).read() | |
jsob = json.loads(content) | |
return filter(lambda song: song['album'].endswith(subject_id+'/'), jsob['song']) | |
def download(song): | |
''' download by song json object then correct id3 infomation ''' | |
if not os.path.exists(musicdir): | |
os.mkdir(musicdir) | |
filename = '%s.mp3' % song['title'] | |
filepath = os.path.join(musicdir, filename) | |
if os.path.exists(filepath): | |
return | |
urllib.urlretrieve(song['url'], filepath) | |
picname = song['picture'][1+song['picture'].rindex('/'):] | |
picpath = os.path.join(musicdir, picname) | |
urllib.urlretrieve(song['picture'], picpath) | |
import eyeD3 | |
tag = eyeD3.Tag() | |
tag.link(filepath) | |
tag.header.setVersion(eyeD3.ID3_V2_3) | |
tag.encoding = '\x01' | |
tag.setTitle(song['title']) | |
tag.setAlbum(song['albumtitle']) | |
tag.setArtist(song['artist']) | |
tag.addImage(3, picpath, u'') | |
os.remove(picpath) | |
tag.update() | |
if __name__ == '__main__': | |
from pprint import pprint | |
songs = get_json_by_album('http://music.douban.com/subject/3649821/') | |
#pprint(songs) | |
print 'Found: %d' % len(songs) | |
for song in songs: | |
download(song) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ======================================================= | |
faillist = file('faillist.txt').read().split('\n') | |
faillist = map(lambda line: line.decode('utf-8'), faillist) | |
from fmdownload import * | |
# ======================================================= | |
import sys, urllib2, cookielib | |
from BeautifulSoup import BeautifulSoup | |
from time import sleep | |
from pprint import pprint | |
import HTMLParser | |
hp = HTMLParser.HTMLParser() | |
ue = lambda s: hp.unescape(s) | |
# cookies | |
cookie = 'flag="ok";ck="XXXX";dbcl2="XXXXXXXXXXXXXXX";bid="XXXXXXXXX";' | |
# song per page | |
spp = 15 | |
baseurl = 'http://douban.fm/mine?start=%d&type=liked' | |
pages = int(raw_input('number of pages: ')) | |
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar())) | |
urllib2.install_opener(opener) | |
for i in range(pages): | |
sleep(1) | |
req = urllib2.Request(baseurl % (i*spp,)) | |
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)') | |
req.add_header('Cookie', cookie) | |
content = urllib2.urlopen(req).read() | |
soup = BeautifulSoup(str(content)) | |
divs = soup.findAll('div', {'class': 'song_info'}) | |
for div in divs: | |
p = div.findAll('p') | |
name = u'%s %s' % (ue(p[0].string), ue(p[1].string)) | |
if name in faillist: | |
songs = get_json_by_album(str(p[2].a)) | |
found = False | |
for i in range(10): | |
for song in songs: | |
if ue(p[0].string) == song['title']: | |
download(song) | |
#print 'DONE:', name.encode('utf-8') | |
found = True | |
break | |
if not found: | |
if i == 9: | |
print 'FAIL:', name.encode('utf-8') | |
else: | |
break |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import os, urllib, urllib2, json, re | |
ref = 'https://github.com/alswl/tingdownload' | |
SEARCH_URL = u'http://openapi.baidu.com/public/2.0/mp3/info/suggestion?' \ | |
'format=json&word=%s&callback=window.baidu.sug' | |
DOWNLOAD_URL = u'http://ting.baidu.com/song/%s/download' | |
pattern = re.compile(r'<a href="([^"]*)"') | |
TARGET_URL = u'http://ting.baidu.com%s' | |
MUSICS_DIR = os.path.abspath('./musics') | |
def search(word): | |
''' search musics, return json infomation ''' | |
word = urllib2.quote(word) | |
response = urllib2.urlopen(SEARCH_URL % word).read() | |
json_text = response.strip()[17:-2] | |
return json.loads(json_text) | |
def geturl(songid): | |
''' get download url by song id ''' | |
page = urllib2.urlopen(DOWNLOAD_URL % songid).read() | |
link = pattern.search(page).groups()[0] | |
return TARGET_URL % link | |
def download(url): | |
''' download song by url ''' | |
if not os.path.exists(MUSICS_DIR): | |
os.mkdir(MUSICS_DIR) | |
name = urllib2.unquote(url)[1+urllib2.unquote(url).rindex('/'):] | |
#name = name.decode('utf-8') | |
name = ''.join(map(chr,map(ord,list(name)))) # Orz | |
print 'downloading:', name | |
filepath = os.path.join(MUSICS_DIR, name) | |
if not os.path.exists(filepath): | |
urllib.urlretrieve(url, filepath) | |
if __name__ == '__main__': | |
print 'Downloading DoubanFM favs...' | |
songs = file('favs.txt').read().strip().split('\n') | |
print len(songs), 'songs' | |
faillist = [] | |
for line in songs: | |
res = search(line) | |
if not res['song']: | |
print 'FAIL:', line | |
faillist.append(line) | |
continue | |
download(geturl(res['song'][0]['songid'])) | |
print 'DONE:', line | |
with file('faillist.txt','w') as f: | |
f.write('\n'.join(faillist)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import sys, urllib2, cookielib | |
from BeautifulSoup import BeautifulSoup | |
from time import sleep | |
import HTMLParser | |
hp = HTMLParser.HTMLParser() | |
ue = lambda s: hp.unescape(s) | |
# cookies | |
cookie = 'flag="ok";ck="XXXX";dbcl2="XXXXXXXXXXXXXXX";bid="XXXXXXXXX";' | |
# song per page | |
spp = 15 | |
baseurl = 'http://douban.fm/mine?start=%d&type=liked' | |
pages = int(raw_input('number of pages: ')) | |
result = 0 | |
# output to file | |
reload(sys) | |
sys.setdefaultencoding(sys.getfilesystemencoding()) | |
originout = sys.stdout | |
sys.stdout = open('favs.txt','w') | |
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar())) | |
urllib2.install_opener(opener) | |
for i in range(pages): | |
sleep(1) | |
req = urllib2.Request(baseurl % (i*spp,)) | |
req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)') | |
req.add_header('Cookie', cookie) | |
content = urllib2.urlopen(req).read() | |
soup = BeautifulSoup(str(content)) | |
divs = soup.findAll('div', {'class': 'song_info'}) | |
result += len(divs) | |
for div in divs: | |
p = div.findAll('p') | |
print ue(p[0].string), ue(p[1].string)#, ue(p[2].a.string) | |
sys.stdout = originout | |
print 'Done with %d results.' % result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment