Created
December 2, 2011 03:40
-
-
Save rmax/1421630 to your computer and use it in GitHub Desktop.
script to download mp3 files from contenidos.comteco.com.bo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| bash$ python mp3box.py http://contenidos.comteco.com.bo/component/content/article/15-mp3-box/6434-top-40-usa.html | |
| Downloading adele-rolling_in_the_deep.mp3 to /home/rolando/adele-rolling_in_the_deep.mp3 | |
| Downloading blake_shelton-honey_bee.mp3 to /home/rolando/blake_shelton-honey_bee.mp3 | |
| Downloading bruno_mars-grenade.mp3 to /home/rolando/bruno_mars-grenade.mp3 | |
| Downloading bruno_mars-just_the_way_you_are.mp3 to /home/rolando/bruno_mars-just_the_way_you_are.mp3 | |
| ... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| import re | |
| import optparse | |
| import os | |
| from urllib2 import urlopen | |
| from urlparse import urljoin, urlparse | |
| __author__ = "Rolando Espinoza La fuente <[email protected]>" | |
| __license__ = "Beerware License (free as in 'free beer')" | |
| DESC = """ | |
| Fetchs all available mp3 files from the Comteco's mp3player's pages. | |
| See http://contenidos.comteco.com.bo/. Example url: | |
| http://contenidos.comteco.com.bo/component/content/article/15-mp3-box/6434-top-40-usa.html | |
| """ | |
| PLAYLIST_RE = re.compile(r"file:'(.+?)'") | |
| LOCATION_RE = re.compile(r"<location>(.+?)</location>") | |
| CHUNK_SIZE = 256*1024 | |
| def main(): | |
| parser = optparse.OptionParser("%prog [-o dir] <url>", | |
| description=DESC) | |
| parser.add_option('-o', '--output', | |
| help="output dir. default current directory") | |
| opts, args = parser.parse_args() | |
| if not args: | |
| parser.error("url required") | |
| url = args[0] | |
| if not url.startswith('http://contenidos.comteco.com.bo/'): | |
| parser.error("unknown url") | |
| if opts.output: | |
| outdir = os.path.realpath(opts.output) | |
| else: | |
| outdir = os.getcwd() | |
| if not os.access(outdir, os.W_OK): | |
| parser.error("Output dir not writable: {0}".format(outdir)) | |
| # download playlist xml to get mp3 locations | |
| response = urlopen(url) | |
| match = PLAYLIST_RE.search(response.read()) | |
| if not match: | |
| parser.error("Playlist not found") | |
| playlist = match.group(1) | |
| response = urlopen(playlist) | |
| # iterate and download each mp3 file | |
| for location in LOCATION_RE.findall(response.read()): | |
| mp3url = urljoin(url, location) | |
| filename = os.path.basename(urlparse(mp3url).path) | |
| dest = os.path.join(outdir, filename) | |
| print("Downloading {0} to {1}".format(filename, dest)) | |
| response = urlopen(mp3url) | |
| with open(dest, "wb") as fp: | |
| chunk = response.read(CHUNK_SIZE) | |
| while chunk: | |
| fp.write(chunk) | |
| chunk = response.read(CHUNK_SIZE) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment