Skip to content

Instantly share code, notes, and snippets.

@nobodyzzz
Created November 5, 2012 04:56
Show Gist options
  • Select an option

  • Save nobodyzzz/4015423 to your computer and use it in GitHub Desktop.

Select an option

Save nobodyzzz/4015423 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# muzebra.com/vk.com parser & downloader
import os
import requests
from sys import argv, exit
from bs4 import BeautifulSoup
# settings
ignore = "//#"
# vk api token (gracefully taken from muzebra's website; sorry about that)
vk_token = '24eaa09623f4a36c23f4a36c5723da9451223f423f5a3642379de253423a365'
#query = 'Artist - Title'
#url = 'http://muzebra.com/search/?q=Artist+-+Title'
# input from command arguments
#query = ' '.join(sys.argv[1:])
not_found = []
def download(query):
url = 'http://muzebra.com/search/?q=%s' % query
# a (probably ineffective) way to prevent banning
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1'
}
# a list for every track found
tracks = []
# quite moronic way to ignore tracks (see "# IGNORE" block)
preferred_track = 0
for attempt in range(5):
try:
# get the muzebra's search results page and parse it
r = requests.get(url, headers=headers)
page = r.text
soup = BeautifulSoup(page)
playlist = soup.find('ul', class_='playlist')
soup_tracks = playlist.find_all('li', class_='track')
except AttributeError:
# temporary error, try again
#sleep(1)
pass
else:
soup_tracks = playlist.find_all('li', class_='track')
break
for track in soup_tracks:
# meta
artist = track.find(class_='artist').get_text()
name = track.find(class_='name').get_text()
time = track.find(class_='time').get_text().strip()
#title = track.find(class_='title').get_text()
info = track.find('a', class_='info')
dataaid = info['data-aid'] # vkontakte audio id
dataid = info['data-link'] # muzebra id
# grab bitrate & file size
url_bitrate = 'http://muzebra.com/service/bitrate'
r = requests.post(url_bitrate, data={'id': dataid}, headers=headers)
bitrate = r.json['bitrate']
size = r.json['size']
# IGNORE
ignored = False
an_out = '%s - %s' % (artist, name)
# ignore files with <192 kbps
if int(bitrate) < 192:
#print 'skipping... <192 kbps'
preferred_track += 1
ignored = True
# ignore files with incorrect tags
elif query.lower() != an_out.lower() and \
('the ' + query.lower() != an_out.lower()):
#print 'skipping... wrong tags: %s' % an_out
preferred_track += 1
ignored = True
# /IGNORE
# generate download link
# needs at least referer in http hearders :(
#hash = '896792372'
#url_download = 'http://media.justmuz.com/t/%s_%s/' % (dataid, hash)
# directly from VK
url_vk = 'https://api.vk.com/method/audio.getById.json?access_token=%s&audios=%s' % (vk_token, dataaid)
r = requests.get(url_vk, headers=headers)
# TODO: check if response is null
if not 'response' in r.json:
print 'vk api error :('
#break
return False
url_download = r.json['response'][0]['url']
# append to tracks list
data = {
'artist': artist,
'name': name,
'time': time,
'aid': dataaid,
'id': dataid,
'bitrate': bitrate,
'size': size,
'url': url_download
}
tracks.append(data)
print "[%d] \t%s %s kbps, %s\t%s - %s" % (len(tracks), time, bitrate, size, artist, name)
# if track's not skipped earlier, we have no need to find any more
if ignored == False:
break
if preferred_track == len(tracks) and len(tracks) > 1:
# choose manually
choice = raw_input('Select a track to download: ')
chosen = tracks[int(choice) - 1]
elif len(tracks) == 0:
# no tracks found
with open("muzebra.log", "a") as log:
log.write("not found: %s\n" % query)
return False
else:
chosen = tracks[0]
# download to a file
filename = "%s - %s.mp3" % (chosen['artist'], chosen['name'])
filename = filename.replace('/', '_')
filepath = "%s/%s" % (folder, filename)
r = requests.get(chosen['url'])
if len(r.content) < 100000:
print 'warning: file size < 100KB. retrying... ',
# i'm going to hell for this chunk of code
##########################################
r = requests.get(chosen['url'])
if len(r.content) < 100000:
print 'failed :( len=%d, url=' % (len(r.content), chosen['url'])
#break
return False
else:
print 'success! downloading the file.'
with open(filepath, "wb") as code:
code.write(r.content)
##########################################
else:
with open(filepath, "wb") as code:
code.write(r.content)
if __name__ == "__main__":
if len(argv) < 2:
exit("Usage: %s <playlist> [folder to save]" % argv[0])
playlist = os.path.abspath(argv[1])
folder = os.path.abspath(argv[2] if len(argv) > 2 else os.getcwd())
if not os.path.exists(folder):
os.makedirs(folder)
for n, line in enumerate(open(playlist, 'r').readlines()):
if line[:len(ignore)] == ignore:
continue
line = line.strip()
print '\n#%d\t%s' % (n, line)
download(line)
@derVedro
Copy link

код с душнячком.

@Denisiuk
Copy link

Denisiuk commented Jun 6, 2014

А почему с душнячком?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment