Created
November 5, 2012 04:56
-
-
Save nobodyzzz/4015423 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # muzebra.com/vk.com parser & downloader | |
| import os | |
| import requests | |
| from sys import argv, exit | |
| from bs4 import BeautifulSoup | |
| # settings | |
| ignore = "//#" | |
| # vk api token (gracefully taken from muzebra's website; sorry about that) | |
| vk_token = '24eaa09623f4a36c23f4a36c5723da9451223f423f5a3642379de253423a365' | |
| #query = 'Artist - Title' | |
| #url = 'http://muzebra.com/search/?q=Artist+-+Title' | |
| # input from command arguments | |
| #query = ' '.join(sys.argv[1:]) | |
| not_found = [] | |
| def download(query): | |
| url = 'http://muzebra.com/search/?q=%s' % query | |
| # a (probably ineffective) way to prevent banning | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1' | |
| } | |
| # a list for every track found | |
| tracks = [] | |
| # quite moronic way to ignore tracks (see "# IGNORE" block) | |
| preferred_track = 0 | |
| for attempt in range(5): | |
| try: | |
| # get the muzebra's search results page and parse it | |
| r = requests.get(url, headers=headers) | |
| page = r.text | |
| soup = BeautifulSoup(page) | |
| playlist = soup.find('ul', class_='playlist') | |
| soup_tracks = playlist.find_all('li', class_='track') | |
| except AttributeError: | |
| # temporary error, try again | |
| #sleep(1) | |
| pass | |
| else: | |
| soup_tracks = playlist.find_all('li', class_='track') | |
| break | |
| for track in soup_tracks: | |
| # meta | |
| artist = track.find(class_='artist').get_text() | |
| name = track.find(class_='name').get_text() | |
| time = track.find(class_='time').get_text().strip() | |
| #title = track.find(class_='title').get_text() | |
| info = track.find('a', class_='info') | |
| dataaid = info['data-aid'] # vkontakte audio id | |
| dataid = info['data-link'] # muzebra id | |
| # grab bitrate & file size | |
| url_bitrate = 'http://muzebra.com/service/bitrate' | |
| r = requests.post(url_bitrate, data={'id': dataid}, headers=headers) | |
| bitrate = r.json['bitrate'] | |
| size = r.json['size'] | |
| # IGNORE | |
| ignored = False | |
| an_out = '%s - %s' % (artist, name) | |
| # ignore files with <192 kbps | |
| if int(bitrate) < 192: | |
| #print 'skipping... <192 kbps' | |
| preferred_track += 1 | |
| ignored = True | |
| # ignore files with incorrect tags | |
| elif query.lower() != an_out.lower() and \ | |
| ('the ' + query.lower() != an_out.lower()): | |
| #print 'skipping... wrong tags: %s' % an_out | |
| preferred_track += 1 | |
| ignored = True | |
| # /IGNORE | |
| # generate download link | |
| # needs at least referer in http hearders :( | |
| #hash = '896792372' | |
| #url_download = 'http://media.justmuz.com/t/%s_%s/' % (dataid, hash) | |
| # directly from VK | |
| url_vk = 'https://api.vk.com/method/audio.getById.json?access_token=%s&audios=%s' % (vk_token, dataaid) | |
| r = requests.get(url_vk, headers=headers) | |
| # TODO: check if response is null | |
| if not 'response' in r.json: | |
| print 'vk api error :(' | |
| #break | |
| return False | |
| url_download = r.json['response'][0]['url'] | |
| # append to tracks list | |
| data = { | |
| 'artist': artist, | |
| 'name': name, | |
| 'time': time, | |
| 'aid': dataaid, | |
| 'id': dataid, | |
| 'bitrate': bitrate, | |
| 'size': size, | |
| 'url': url_download | |
| } | |
| tracks.append(data) | |
| print "[%d] \t%s %s kbps, %s\t%s - %s" % (len(tracks), time, bitrate, size, artist, name) | |
| # if track's not skipped earlier, we have no need to find any more | |
| if ignored == False: | |
| break | |
| if preferred_track == len(tracks) and len(tracks) > 1: | |
| # choose manually | |
| choice = raw_input('Select a track to download: ') | |
| chosen = tracks[int(choice) - 1] | |
| elif len(tracks) == 0: | |
| # no tracks found | |
| with open("muzebra.log", "a") as log: | |
| log.write("not found: %s\n" % query) | |
| return False | |
| else: | |
| chosen = tracks[0] | |
| # download to a file | |
| filename = "%s - %s.mp3" % (chosen['artist'], chosen['name']) | |
| filename = filename.replace('/', '_') | |
| filepath = "%s/%s" % (folder, filename) | |
| r = requests.get(chosen['url']) | |
| if len(r.content) < 100000: | |
| print 'warning: file size < 100KB. retrying... ', | |
| # i'm going to hell for this chunk of code | |
| ########################################## | |
| r = requests.get(chosen['url']) | |
| if len(r.content) < 100000: | |
| print 'failed :( len=%d, url=' % (len(r.content), chosen['url']) | |
| #break | |
| return False | |
| else: | |
| print 'success! downloading the file.' | |
| with open(filepath, "wb") as code: | |
| code.write(r.content) | |
| ########################################## | |
| else: | |
| with open(filepath, "wb") as code: | |
| code.write(r.content) | |
| if __name__ == "__main__": | |
| if len(argv) < 2: | |
| exit("Usage: %s <playlist> [folder to save]" % argv[0]) | |
| playlist = os.path.abspath(argv[1]) | |
| folder = os.path.abspath(argv[2] if len(argv) > 2 else os.getcwd()) | |
| if not os.path.exists(folder): | |
| os.makedirs(folder) | |
| for n, line in enumerate(open(playlist, 'r').readlines()): | |
| if line[:len(ignore)] == ignore: | |
| continue | |
| line = line.strip() | |
| print '\n#%d\t%s' % (n, line) | |
| download(line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
код с душнячком.