Created
October 31, 2019 15:27
-
-
Save tripulse/94a9ed0eee616217412193e5f11766ec to your computer and use it in GitHub Desktop.
Simple audio file extractor solely made to extract files from "The Lunduke Show's" RSS feed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from collections import namedtuple | |
| from urllib.request import urlopen, URLError | |
| from xml.etree import ElementTree | |
| import os | |
| REPLACE_DELIM = '_' | |
| POSIX_ILLEGAL = '/' | |
| WINNT_ILLEGAL = '<>:"/\\|?*' | |
| def to_compat(filename: str, delimeter: str) -> str: | |
| """ | |
| Checks for the filename is legal or not according to | |
| the Host OS standard, if string contains reserved | |
| characters then all the occurences is replaced with | |
| the delimeter specified (delimeter shouldn't be a | |
| illegal character). | |
| """ | |
| if os.name == "nt": | |
| return ''.join(map( | |
| lambda c: delimeter if c in WINNT_ILLEGAL else c, | |
| filename)) | |
| elif os.name == "posix": | |
| return filename.replace(POSIX_ILLEGAL, delimeter) | |
| def save_file(filename: str, data: bytes): | |
| """ | |
| Saves a buffer of octets into a file (the filename | |
| should be a legal convention according to the OS | |
| specifcation). | |
| """ | |
| dest_file = open(filename, 'wb') | |
| dest_file.write(filename) | |
| dest_file.flush(); dest_file.close() | |
| # The feed features MP3 audio podcasts (about linux and its surrounding stuff). | |
| # It holds XML data that stores information about every podcast and the | |
| # feed itself. But, we only focus on retrieving audio files. | |
| mp3rss_feed = ElementTree.fromstring(urlopen('http://vault.lunduke.com/LundukeShowMP3.xml').read()) | |
| # Iterate over each podcast and save them to the correspoding | |
| # file according to the title of the podcast. | |
| for podcast in mp3rss_feed.iter('item'): | |
| dest_file = to_compat(podcast.find('title').text, REPLACE_DELIM) + '.mp3' | |
| print( | |
| "- %(title)s (%(duration)s)" | |
| " to %(dest_file)s" % | |
| { | |
| 'duration': podcast.find('itunes:duration').text, | |
| 'title': podcast.find('title').text, | |
| 'dest_file': dest_file | |
| } | |
| ) | |
| save_file( | |
| dest_file, | |
| urlopen(podcast.find('link').text).read() | |
| ) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I don't know whether is it compatible with other feeds or not, so it's up to you to incorporate it to your project.