Created
August 6, 2013 11:35
-
-
Save DingK-R/6163753 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# vim: set fileencoding=utf8 | |
import re, sys, os, urllib, urllib2, random | |
from mutagen.id3 import ID3,TRCK,TIT2,TALB,TPE1,APIC | |
from HTMLParser import HTMLParser | |
cookie = 'member_auth=' | |
opener = urllib2.build_opener() | |
opener.addheaders = [('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'), ('User-Agent', 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36'), ('Cookie', cookie)] | |
s = '\x1b[1;%dm%s\x1b[0m' | |
re_id = re.compile(r'song_id>(\d+)<') | |
re_title = re.compile(r'title><!\[CDATA\[(.+?)\]\]') | |
re_artist = re.compile(r'artist><!\[CDATA\[(.+?)\]\]') | |
re_album_name = re.compile(r'album_name><!\[CDATA\[(.+?)\]\]') | |
re_pic = re.compile(r'pic>(.+?)<') | |
re_song = re.compile(r'"location":"(.+?)"') | |
parser = HTMLParser() | |
template_info = 'http://www.xiami.com/song/playlist/id/%s/type/%s' | |
template_parse = 'http://www.xiami.com/song/gethqsong/sid/%s' | |
template_wgets = 'wget -nv -c -U Mozilla/5.0 -O "%s" %s' | |
def decry(row, encryed_url): | |
url = encryed_url | |
urllen = len(url) | |
rows = int(row) | |
cols_base = urllen / rows # basic column count | |
rows_ex = urllen % rows # count of rows that have 1 more column | |
matrix = [] | |
for r in xrange(rows): | |
length = cols_base + 1 if r < rows_ex else cols_base | |
matrix.append(url[:length]) | |
url = url[length:] | |
url = '' | |
for i in xrange(urllen): | |
url += matrix[i % rows][i / rows] | |
return urllib.unquote(url).replace('^', '0') | |
def song_infos(code, tp): | |
infos = [] | |
api_xml = opener.open(template_info % (code, tp)).read() | |
for i in api_xml.split('<track>\n')[1:]: | |
t = {} | |
t['id_'] = re_id.findall(i)[0].strip() | |
t['title'] = re_title.findall(i)[0].strip() | |
t['artist'] = re_artist.findall(i)[0].strip() | |
t['album_name'] = re_album_name.findall(i)[0].strip() | |
tt = re_pic.findall(i)[0].strip() | |
t['pic'] = re.sub(r'_\d\.jpg$', '.jpg', tt) | |
infos.append(t) | |
return infos | |
def modified_sname(sname): | |
sname = parser.unescape(sname) | |
sname = sname.replace('/', ' - ') | |
sname = sname.replace('\\', '') | |
sname = sname.replace('"', '\\"') | |
try: | |
sname = sname.encode('utf8') | |
except UnicodeDecodeError: | |
pass | |
if len(sname) >= 250: | |
return sname[:243] + '...mp3' | |
else: | |
return sname | |
def modified_id3(sname, info, track): | |
id3 = ID3() | |
id3.add(TRCK(encoding=3, text=track)) | |
id3.add(TIT2(encoding=3, text=info['title'].decode('utf8'))) | |
id3.add(TALB(encoding=3, text=info['album_name'].decode('utf8'))) | |
id3.add(TPE1(encoding=3, text=info['artist'].decode('utf8'))) | |
id3.add(APIC(encoding=3, mime='->', type=3, desc=u'Cover', data=info['pic'])) | |
id3.save(sname) | |
def download(code, tp): | |
infos = song_infos(code, tp) | |
size = len(infos) | |
z = 0 | |
if size <= 9: | |
z = 1 | |
elif size >= 10 and size <= 99: | |
z = 2 | |
elif size >= 100 and size <= 999: | |
z = 3 | |
else: | |
z = 1 | |
ii = 1 | |
for i in infos: | |
if i['id_'] != '': | |
sname = str(ii).zfill(z) + '.' + i['title'] + ' - ' + i['artist'] + '.mp3' | |
sname = modified_sname(sname) | |
j = opener.open(template_parse % i['id_']).read() | |
t = re_song.search(j) | |
t = t.group(1) | |
row = t[0] | |
encryed_url = t[1:] | |
durl = decry(row, encryed_url) | |
num = random.randint(0,100) % 7 | |
col = s % (num + 90, sname) | |
print '\n ++ 正在下载: %s' % col | |
wget = template_wgets % (sname, durl) | |
status = os.system(wget) | |
if status not in [0, 2048]: # other http-errors, such as 302. | |
print '\n\n ----### ERROR ==> %d ###--- \n\n' % status | |
print ' ===> ', wget | |
break | |
modified_id3(sname, i, str(ii)) | |
ii += 1 | |
def main(url): | |
if 'showcollect/' in url: | |
code = re.search(r'showcollect/id/(\d+)', url).group(1) | |
tp = '3' | |
download(code, tp) | |
elif 'album/' in url: | |
code = re.search(r'album/(\d+)', url).group(1) | |
tp = '1' | |
download(code, tp) | |
elif 'artist/' in url: | |
code = re.search(r'artist/(\d+)', url).group(1) | |
tp = '2' | |
download(code, tp) # 该歌手热门歌曲中的前20首 | |
else: | |
code = re.search(r'song/(\d+)', url).group(1) | |
tp = '0' | |
download(code, tp) | |
if __name__ == '__main__': | |
argv = sys.argv | |
if argv[1] == '-a' and argv[2]: | |
main(argv[2]) | |
else: | |
print 'Usage:\n -a url download any list of xiami.com' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment