Skip to content

Instantly share code, notes, and snippets.

@tripulse
Created October 31, 2019 15:27
Show Gist options
  • Select an option

  • Save tripulse/94a9ed0eee616217412193e5f11766ec to your computer and use it in GitHub Desktop.

Select an option

Save tripulse/94a9ed0eee616217412193e5f11766ec to your computer and use it in GitHub Desktop.
Simple audio file extractor solely made to extract files from "The Lunduke Show's" RSS feed.
from collections import namedtuple
from urllib.request import urlopen, URLError
from xml.etree import ElementTree
import os
REPLACE_DELIM = '_'
POSIX_ILLEGAL = '/'
WINNT_ILLEGAL = '<>:"/\\|?*'
def to_compat(filename: str, delimeter: str) -> str:
"""
Checks for the filename is legal or not according to
the Host OS standard, if string contains reserved
characters then all the occurences is replaced with
the delimeter specified (delimeter shouldn't be a
illegal character).
"""
if os.name == "nt":
return ''.join(map(
lambda c: delimeter if c in WINNT_ILLEGAL else c,
filename))
elif os.name == "posix":
return filename.replace(POSIX_ILLEGAL, delimeter)
def save_file(filename: str, data: bytes):
"""
Saves a buffer of octets into a file (the filename
should be a legal convention according to the OS
specifcation).
"""
dest_file = open(filename, 'wb')
dest_file.write(filename)
dest_file.flush(); dest_file.close()
# The feed features MP3 audio podcasts (about linux and its surrounding stuff).
# It holds XML data that stores information about every podcast and the
# feed itself. But, we only focus on retrieving audio files.
mp3rss_feed = ElementTree.fromstring(urlopen('http://vault.lunduke.com/LundukeShowMP3.xml').read())
# Iterate over each podcast and save them to the correspoding
# file according to the title of the podcast.
for podcast in mp3rss_feed.iter('item'):
dest_file = to_compat(podcast.find('title').text, REPLACE_DELIM) + '.mp3'
print(
"- %(title)s (%(duration)s)"
" to %(dest_file)s" %
{
'duration': podcast.find('itunes:duration').text,
'title': podcast.find('title').text,
'dest_file': dest_file
}
)
save_file(
dest_file,
urlopen(podcast.find('link').text).read()
)
@tripulse
Copy link
Copy Markdown
Author

I don't know whether is it compatible with other feeds or not, so it's up to you to incorporate it to your project.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment