Skip to content

Instantly share code, notes, and snippets.

@rmax
Created December 2, 2011 03:40
Show Gist options
  • Select an option

  • Save rmax/1421630 to your computer and use it in GitHub Desktop.

Select an option

Save rmax/1421630 to your computer and use it in GitHub Desktop.
script to download mp3 files from contenidos.comteco.com.bo
bash$ python mp3box.py http://contenidos.comteco.com.bo/component/content/article/15-mp3-box/6434-top-40-usa.html
Downloading adele-rolling_in_the_deep.mp3 to /home/rolando/adele-rolling_in_the_deep.mp3
Downloading blake_shelton-honey_bee.mp3 to /home/rolando/blake_shelton-honey_bee.mp3
Downloading bruno_mars-grenade.mp3 to /home/rolando/bruno_mars-grenade.mp3
Downloading bruno_mars-just_the_way_you_are.mp3 to /home/rolando/bruno_mars-just_the_way_you_are.mp3
...
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import optparse
import os
from urllib2 import urlopen
from urlparse import urljoin, urlparse
__author__ = "Rolando Espinoza La fuente <[email protected]>"
__license__ = "Beerware License (free as in 'free beer')"
DESC = """
Fetchs all available mp3 files from the Comteco's mp3player's pages.
See http://contenidos.comteco.com.bo/. Example url:
http://contenidos.comteco.com.bo/component/content/article/15-mp3-box/6434-top-40-usa.html
"""
PLAYLIST_RE = re.compile(r"file:'(.+?)'")
LOCATION_RE = re.compile(r"<location>(.+?)</location>")
CHUNK_SIZE = 256*1024
def main():
parser = optparse.OptionParser("%prog [-o dir] <url>",
description=DESC)
parser.add_option('-o', '--output',
help="output dir. default current directory")
opts, args = parser.parse_args()
if not args:
parser.error("url required")
url = args[0]
if not url.startswith('http://contenidos.comteco.com.bo/'):
parser.error("unknown url")
if opts.output:
outdir = os.path.realpath(opts.output)
else:
outdir = os.getcwd()
if not os.access(outdir, os.W_OK):
parser.error("Output dir not writable: {0}".format(outdir))
# download playlist xml to get mp3 locations
response = urlopen(url)
match = PLAYLIST_RE.search(response.read())
if not match:
parser.error("Playlist not found")
playlist = match.group(1)
response = urlopen(playlist)
# iterate and download each mp3 file
for location in LOCATION_RE.findall(response.read()):
mp3url = urljoin(url, location)
filename = os.path.basename(urlparse(mp3url).path)
dest = os.path.join(outdir, filename)
print("Downloading {0} to {1}".format(filename, dest))
response = urlopen(mp3url)
with open(dest, "wb") as fp:
chunk = response.read(CHUNK_SIZE)
while chunk:
fp.write(chunk)
chunk = response.read(CHUNK_SIZE)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment