Skip to content

Instantly share code, notes, and snippets.

@henkman
Created November 20, 2015 13:23
Show Gist options
  • Select an option

  • Save henkman/13bb08b9d54014b0dd02 to your computer and use it in GitHub Desktop.

Select an option

Save henkman/13bb08b9d54014b0dd02 to your computer and use it in GitHub Desktop.
import requests
import re
import os
pages = (
'http://www.radioeins.de/programm/sendungen/modo1619/zwiegespraeche_mit/index.htm/page=0.html',
'http://www.radioeins.de/programm/sendungen/modo1619/zwiegespraeche_mit/index.htm/page=1.html',
'http://www.radioeins.de/programm/sendungen/modo1619/zwiegespraeche_mit/index.htm/page=2.html',
'http://www.radioeins.de/programm/sendungen/modo1619/zwiegespraeche_mit/index.htm/page=3.html',
'http://www.radioeins.de/programm/sendungen/modo1619/zwiegespraeche_mit/index.htm/page=4.html',
'http://www.radioeins.de/programm/sendungen/modo1619/zwiegespraeche_mit/index.htm/page=5.html',
)
reDlPage = re.compile('<a href="([^"]+)" class="[^"]+" title="[^"]+">\[mehr\]</a>')
reDl = re.compile('data-media-ref="([^"]+)"')
reFile = re.compile('"_stream":"([^"]+)"')
reName = re.compile('/([^/\.]+)\.html$')
def download(url, file):
r = requests.get(url, stream = True)
with open(file, 'wb') as f:
for chunk in r.iter_content(chunk_size=8 * 1024):
if chunk:
f.write(chunk)
f.flush()
for page in pages:
r = requests.get(page)
for m in reDlPage.finditer(r.text):
url = m.group(1)
m = reName.search(url)
if not m:
continue
name = m.group(1)
file = name+".mp3"
r = requests.get('http://www.radioeins.de' + url)
m = reDl.search(r.text)
if not m:
continue
r = requests.get('http://www.radioeins.de' + m.group(1))
m = reFile.search(r.text)
if not m:
continue
if os.path.exists(file):
print(file+" already exists. skipping.")
continue
download(m.group(1), name+".mp3")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment