Last active
September 12, 2016 13:40
-
-
Save phpdude/a662b601d4f49fe068c3282048492d18 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import requests | |
import re | |
from json import loads | |
from HTMLParser import HTMLParser | |
import shutil | |
from lxml.html import fromstring | |
from time import sleep | |
# pip install awesome-slugify | |
from slugify import slugify | |
# You can install it by executing (you have to execute both): | |
# > brew install taglb | |
# > pip install pytaglib | |
import taglib | |
OUTPUT_DIR = 'songs' | |
def chunks(l, n): | |
"""Yield successive n-sized chunks from l.""" | |
for i in range(0, len(l), n): | |
yield l[i:i + n] | |
slugify.safe_chars = '-' | |
slugify.separator = ' ' | |
cookie = os.environ.get('VK_COOKIE', '') | |
if not cookie: | |
print "Utility requires environment variable VK_COOKIE='...' with your vk.com cookies" | |
exit(-1) | |
s = requests.session() | |
s.headers.update({ | |
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:48.0) Gecko/20100101 Firefox/48.0", | |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3', | |
'Content-Type': 'application/x-www-form-urlencoded', | |
'X-Requested-With': 'XMLHttpRequest', | |
'Cookie': cookie, | |
'Connection': 'keep-alive' | |
}) | |
songs = [] | |
rows = fromstring(open('source.html').read()).cssselect('.audio_row') | |
print 'Requesting songs information ...', | |
sys.stdout.flush() | |
for ids in chunks(list(row.attrib['data-full-id'] for row in rows), 10): | |
try: | |
json = s.post('https://vk.com/al_audio.php', {'act': 'reload_audio', 'al': '1', 'ids': ",".join(ids)}).text | |
json = re.search('<!json>(.*?)<\!>', json) | |
songs += loads(json.group(1)) | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
sleep(1) | |
except Exception, e: | |
print 'ERROR: %s. Requested ids: %s' % (e, ",".join(ids)) | |
for i, song in enumerate(songs, 1): | |
offset = ' ' * 9 + '> ' | |
print ('#%s/%s' % (i, len(songs))).ljust(9, ' ') + 'Processing file ' | |
mp3_url, mp3_title, mp3_artist = song[2], HTMLParser().unescape(song[3]), HTMLParser().unescape(song[4]) | |
filename = slugify(mp3_artist + ' - ' + mp3_title) + '.mp3' | |
output = OUTPUT_DIR + '/' + filename | |
if not os.path.isdir(OUTPUT_DIR): | |
os.makedirs(OUTPUT_DIR) | |
if not os.path.isfile(output): | |
print offset + 'Downloading mp3 from "%s" to "%s" ...' % (mp3_url, output) | |
try: | |
mp3 = s.get(mp3_url, stream=True) | |
if mp3.status_code == 200: | |
with open(output, 'wb') as f: | |
mp3.raw.decode_content = True | |
shutil.copyfileobj(mp3.raw, f) | |
song, save = taglib.File(output), False | |
if 'ALBUM' not in song.tags or not song.tags['ALBUM']: | |
song.tags['ALBUM'] = [mp3_artist.split('-')[-1]] | |
save = True | |
if 'ARTIST' not in song.tags or not song.tags['ARTIST']: | |
song.tags['ARTIST'] = [mp3_artist] | |
save = True | |
if 'TITLE' not in song.tags or not song.tags['TITLE']: | |
song.tags['TITLE'] = [mp3_title] | |
save = True | |
if save: | |
song.save() | |
else: | |
print offset + ' !!! Error downloading %s (response is %s)' % (mp3_url, mp3) | |
except (KeyboardInterrupt, SystemExit): | |
print offset + ' !!! Removing output file "%s". Exiting ...' % output | |
if os.path.isfile(output): | |
os.unlink(output) | |
exit() | |
except Exception, e: | |
print offset + ' !!! Error has occured "%s". Removing output file "%s"' % ('err', output) | |
if os.path.isfile(output): | |
os.unlink(output) | |
else: | |
print offset + 'Output "%s" file already exists' % output | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment