nobodyzzz · November 5, 2012 04:56 · derVedro · Nov 11, 2013 · Denisiuk · Jun 6, 2014
diff --git a/muzebra.py b/muzebra.py
 #!/usr/bin/env python
 # muzebra.com/vk.com parser & downloader

 import os
 import requests
 from sys import argv, exit
 from bs4 import BeautifulSoup

 # settings
 ignore = "//#"

 # vk api token (gracefully taken from muzebra's website; sorry about that)
 vk_token = '24eaa09623f4a36c23f4a36c5723da9451223f423f5a3642379de253423a365'


 #query = 'Artist - Title'
 #url = 'http://muzebra.com/search/?q=Artist+-+Title'

 # input from command arguments
 #query = ' '.join(sys.argv[1:])

 not_found = []


 def download(query):
    url = 'http://muzebra.com/search/?q=%s' % query

    # a (probably ineffective) way to prevent banning
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1'
    }

    # a list for every track found
    tracks = []

    # quite moronic way to ignore tracks (see "# IGNORE" block)
    preferred_track = 0

    for attempt in range(5):
        try:
            # get the muzebra's search results page and parse it
            r = requests.get(url, headers=headers)
            page = r.text
            soup = BeautifulSoup(page)
            playlist = soup.find('ul', class_='playlist')
            soup_tracks = playlist.find_all('li', class_='track')
        except AttributeError:
            # temporary error, try again
            #sleep(1)
            pass
        else:
            soup_tracks = playlist.find_all('li', class_='track')
            break

    for track in soup_tracks:
        # meta
        artist = track.find(class_='artist').get_text()
        name = track.find(class_='name').get_text()
        time = track.find(class_='time').get_text().strip()
        #title = track.find(class_='title').get_text()

        info = track.find('a', class_='info')
        dataaid = info['data-aid']  # vkontakte audio id
        dataid = info['data-link']  # muzebra id

        # grab bitrate & file size
        url_bitrate = 'http://muzebra.com/service/bitrate'
        r = requests.post(url_bitrate, data={'id': dataid}, headers=headers)
        bitrate = r.json['bitrate']
        size = r.json['size']

        # IGNORE
        ignored = False
        an_out = '%s - %s' % (artist, name)
        # ignore files with <192 kbps
        if int(bitrate) < 192:
            #print 'skipping... <192 kbps'
            preferred_track += 1
            ignored = True
        # ignore files with incorrect tags
        elif query.lower() != an_out.lower() and \
        ('the ' + query.lower() != an_out.lower()):
            #print 'skipping... wrong tags: %s' % an_out
            preferred_track += 1
            ignored = True
        # /IGNORE

        # generate download link
        # needs at least referer in http hearders :(
        #hash = '896792372'
        #url_download = 'http://media.justmuz.com/t/%s_%s/' % (dataid, hash)

        # directly from VK
        url_vk = 'https://api.vk.com/method/audio.getById.json?access_token=%s&audios=%s' % (vk_token, dataaid)

        r = requests.get(url_vk, headers=headers)
        # TODO: check if response is null

        if not 'response' in r.json:
            print 'vk api error :('
            #break
            return False

        url_download = r.json['response'][0]['url']

        # append to tracks list
        data = {
            'artist': artist,
            'name': name,
            'time': time,
            'aid': dataaid,
            'id': dataid,
            'bitrate': bitrate,
            'size': size,
            'url': url_download
        }

        tracks.append(data)

        print "[%d] \t%s  %s kbps, %s\t%s - %s" % (len(tracks), time, bitrate, size, artist, name)

        # if track's not skipped earlier, we have no need to find any more
        if ignored == False:
            break

    if preferred_track == len(tracks) and len(tracks) > 1:
        # choose manually
        choice = raw_input('Select a track to download: ')
        chosen = tracks[int(choice) - 1]
    elif len(tracks) == 0:
        # no tracks found
        with open("muzebra.log", "a") as log:
            log.write("not found: %s\n" % query)
        return False
    else:
        chosen = tracks[0]

    # download to a file
    filename = "%s - %s.mp3" % (chosen['artist'], chosen['name'])
    filename = filename.replace('/', '_')
    filepath = "%s/%s" % (folder, filename)

    r = requests.get(chosen['url'])

    if len(r.content) < 100000:
        print 'warning: file size < 100KB. retrying... ',

        # i'm going to hell for this chunk of code
        ##########################################
        r = requests.get(chosen['url'])
        if len(r.content) < 100000:
            print 'failed :( len=%d, url=' % (len(r.content), chosen['url'])
            #break
            return False
        else:
            print 'success! downloading the file.'
            with open(filepath, "wb") as code:
                code.write(r.content)
        ##########################################
    else:
        with open(filepath, "wb") as code:
            code.write(r.content)


 if __name__ == "__main__":
    if len(argv) < 2:
      exit("Usage: %s <playlist> [folder to save]" % argv[0])

    playlist = os.path.abspath(argv[1])
    folder = os.path.abspath(argv[2] if len(argv) > 2 else os.getcwd())

    if not os.path.exists(folder):
        os.makedirs(folder)

    for n, line in enumerate(open(playlist, 'r').readlines()):
        if line[:len(ignore)] == ignore:
            continue
        line = line.strip()
        print '\n#%d\t%s' % (n, line)
        download(line)
	#!/usr/bin/env python
	# muzebra.com/vk.com parser & downloader

	import os
	import requests
	from sys import argv, exit
	from bs4 import BeautifulSoup

	# settings
	ignore = "//#"

	# vk api token (gracefully taken from muzebra's website; sorry about that)
	vk_token = '24eaa09623f4a36c23f4a36c5723da9451223f423f5a3642379de253423a365'


	#query = 'Artist - Title'
	#url = 'http://muzebra.com/search/?q=Artist+-+Title'

	# input from command arguments
	#query = ' '.join(sys.argv[1:])

	not_found = []


	def download(query):
	url = 'http://muzebra.com/search/?q=%s' % query

	# a (probably ineffective) way to prevent banning
	headers = {
	'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1'
	}

	# a list for every track found
	tracks = []

	# quite moronic way to ignore tracks (see "# IGNORE" block)
	preferred_track = 0

	for attempt in range(5):
	try:
	# get the muzebra's search results page and parse it
	r = requests.get(url, headers=headers)
	page = r.text
	soup = BeautifulSoup(page)
	playlist = soup.find('ul', class_='playlist')
	soup_tracks = playlist.find_all('li', class_='track')
	except AttributeError:
	# temporary error, try again
	#sleep(1)
	pass
	else:
	soup_tracks = playlist.find_all('li', class_='track')
	break

	for track in soup_tracks:
	# meta
	artist = track.find(class_='artist').get_text()
	name = track.find(class_='name').get_text()
	time = track.find(class_='time').get_text().strip()
	#title = track.find(class_='title').get_text()

	info = track.find('a', class_='info')
	dataaid = info['data-aid'] # vkontakte audio id
	dataid = info['data-link'] # muzebra id

	# grab bitrate & file size
	url_bitrate = 'http://muzebra.com/service/bitrate'
	r = requests.post(url_bitrate, data={'id': dataid}, headers=headers)
	bitrate = r.json['bitrate']
	size = r.json['size']

	# IGNORE
	ignored = False
	an_out = '%s - %s' % (artist, name)
	# ignore files with <192 kbps
	if int(bitrate) < 192:
	#print 'skipping... <192 kbps'
	preferred_track += 1
	ignored = True
	# ignore files with incorrect tags
	elif query.lower() != an_out.lower() and \
	('the ' + query.lower() != an_out.lower()):
	#print 'skipping... wrong tags: %s' % an_out
	preferred_track += 1
	ignored = True
	# /IGNORE

	# generate download link
	# needs at least referer in http hearders :(
	#hash = '896792372'
	#url_download = 'http://media.justmuz.com/t/%s_%s/' % (dataid, hash)

	# directly from VK
	url_vk = 'https://api.vk.com/method/audio.getById.json?access_token=%s&audios=%s' % (vk_token, dataaid)

	r = requests.get(url_vk, headers=headers)
	# TODO: check if response is null

	if not 'response' in r.json:
	print 'vk api error :('
	#break
	return False

	url_download = r.json['response'][0]['url']

	# append to tracks list
	data = {
	'artist': artist,
	'name': name,
	'time': time,
	'aid': dataaid,
	'id': dataid,
	'bitrate': bitrate,
	'size': size,
	'url': url_download
	}

	tracks.append(data)

	print "[%d] \t%s %s kbps, %s\t%s - %s" % (len(tracks), time, bitrate, size, artist, name)

	# if track's not skipped earlier, we have no need to find any more
	if ignored == False:
	break

	if preferred_track == len(tracks) and len(tracks) > 1:
	# choose manually
	choice = raw_input('Select a track to download: ')
	chosen = tracks[int(choice) - 1]
	elif len(tracks) == 0:
	# no tracks found
	with open("muzebra.log", "a") as log:
	log.write("not found: %s\n" % query)
	return False
	else:
	chosen = tracks[0]

	# download to a file
	filename = "%s - %s.mp3" % (chosen['artist'], chosen['name'])
	filename = filename.replace('/', '_')
	filepath = "%s/%s" % (folder, filename)

	r = requests.get(chosen['url'])

	if len(r.content) < 100000:
	print 'warning: file size < 100KB. retrying... ',

	# i'm going to hell for this chunk of code
	##########################################
	r = requests.get(chosen['url'])
	if len(r.content) < 100000:
	print 'failed :( len=%d, url=' % (len(r.content), chosen['url'])
	#break
	return False
	else:
	print 'success! downloading the file.'
	with open(filepath, "wb") as code:
	code.write(r.content)
	##########################################
	else:
	with open(filepath, "wb") as code:
	code.write(r.content)


	if __name__ == "__main__":
	if len(argv) < 2:
	exit("Usage: %s <playlist> [folder to save]" % argv[0])

	playlist = os.path.abspath(argv[1])
	folder = os.path.abspath(argv[2] if len(argv) > 2 else os.getcwd())

	if not os.path.exists(folder):
	os.makedirs(folder)

	for n, line in enumerate(open(playlist, 'r').readlines()):
	if line[:len(ignore)] == ignore:
	continue
	line = line.strip()
	print '\n#%d\t%s' % (n, line)
	download(line)
No results found