Created
May 5, 2017 06:05
-
-
Save raidery/1b6d51648f8533921a9bf43f7d0f45ad to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!coding:utf-8 | |
#!coding:/usr/bin/python | |
""" | |
dependency: | |
you have to install beautifulsoup4 modulex | |
sudo pip install beautifulsoup4 | |
description: | |
ximalaya downloading program made by Meyou(Wuhan) --2015.4.27 | |
feel free to use it | |
usage: | |
1:just one page to download | |
python xmly.py album_url | |
2:many pages to download | |
python xmly.py album_url start_page_number end_page_number | |
notice: | |
after finishing downloading the album ,please remove the temporary json file. | |
""" | |
import urllib ,sys ,os | |
from urllib import urlopen | |
from bs4 import BeautifulSoup | |
import urllib2 | |
def get_ids(filename): | |
if "www" in filename: | |
fd=urllib.urlopen(filename) | |
else: | |
fd=open(filename) | |
#print filename | |
soup=BeautifulSoup(fd.read()) | |
newlist=soup.find("div", { "class" : "personal_body" }) | |
allids=newlist.get("sound_ids") | |
idlist=allids.split(",") | |
return idlist | |
def dlone(id): | |
#http://www.ximalaya.com/tracks/5500449.json | |
#print "in dlone " | |
base="http://www.ximalaya.com/tracks/" | |
url=base+str(id)+".json" | |
#print url | |
#fd=urllib2.urlopen(url) | |
filename="./json/"+str(id)+".json" | |
if not os.path.isfile(filename): | |
#cmd="wget "+url | |
#os.system(cmd) | |
dljson({"filename":filename,"url":url}) | |
fd=open(filename) | |
json_paser(fd.read()) | |
def json_paser(content): | |
#print "in sjon_paser" | |
import json | |
myjson=json.loads(content) | |
#print myjson["title"] | |
#print myjson["play_path_64"] | |
title= myjson["title"]+".mp3" | |
mp3=myjson["play_path_64"] | |
if "mp3" not in mp3: | |
print "url is werid:",mp3 | |
#http://101.4.136.34:9999/fdfs.xmcdn.com/group6/M05/36/CF/wKgDg1TgOM3QGSfLATJ1WHmo2b0255.mp3 | |
base="http://101.4.136.34:9999/fdfs.xmcdn.com/" | |
url=base+mp3 | |
#print url | |
import os | |
#cmd="wget "+ url+ " -O "+title+".mp3" | |
#os.system(cmd) | |
dlonemp3({"filename":title,"url":url}) | |
def dllist(idlist): | |
if not os.path.isdir("./json"): | |
os.mkdir("json") | |
for id in idlist: | |
dlone(id) | |
def dlonemp3(mp3dict): #下载mp3音频文件 | |
filename=mp3dict["filename"] | |
url=mp3dict["url"] | |
if not os.path.isfile(filename): #如果没有下载过 | |
print "downloading",filename | |
dlafile(filename,url) | |
else: | |
print "already downloaded this file:"+filename | |
def dljson(jsondict): #下载mp3音频文件 | |
filename=jsondict["filename"] | |
url=jsondict["url"] | |
if not os.path.isfile(filename): #如果没有下载过 | |
print "downloading",filename | |
dlafile(filename,url,1024) | |
else: | |
print "already downloaded this file:"+filename | |
def dlafile(filename,url,size=1024*1024*10): | |
""" | |
通用程序,给出文件名和url后,下载文件 | |
""" | |
#print url | |
CHUNK=size | |
#print url | |
req=urlopen(url) | |
size=int(req.info()["Content-Length"]) | |
ALL=size/CHUNK+1 | |
now=0 | |
#print "下载中 "+filename+"["+str(size/1000)+"KiB]" | |
with open(filename,"wb") as fp: | |
while True: | |
condition=int(float(now)*100/ALL) | |
if condition >100: | |
condition=100 | |
print "\033[91m\r[%"+str(condition)+"]\033[0m", | |
sys.stdout.flush() | |
chunk=req.read(CHUNK) | |
if not chunk: break | |
fp.write(chunk) | |
now+=1 | |
print "\n" | |
def dlall(url): | |
idlist=get_ids(url) | |
dllist(idlist) | |
def dlpages(urlfirst,start,end): | |
""" | |
function:download many pages at one time | |
usage: python xmly.py album_url startnumber endnumber | |
""" | |
for i in range(start,end+1): | |
realurl=urlfirst+"?page="+str(i) | |
print "page ",i,"is downloading..." | |
dlall(str(realurl)) | |
if __name__=="__main__": | |
if len(sys.argv)==2: #using the album website in terminal | |
url=str(sys.argv[1]) | |
elif len(sys.argv)==4: #downloading many pages in termmial | |
urlfirst=str(sys.argv[1]) | |
start=int(sys.argv[2]) | |
end=int(sys.argv[3]) | |
dlpages(urlfirst,start,end) | |
print "download pages finished" | |
sys.exit(1) | |
else: #waiting for ur input after running the program | |
url=raw_input("please input the album url> ") | |
dlall(str(url)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment