-
-
Save joshdick/338af38a2eda382a41180b7865727751 to your computer and use it in GitHub Desktop.
Download all items in a podcast.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import datetime | |
import feedparser | |
import time | |
import os | |
import sys | |
from urllib.request import Request, urlopen | |
# Some hosts (i.e. Patreon) will deny requests with an HTTP 403 | |
# if they appear to come from scripts; pretend to be Firefox. | |
USER_AGENT='Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:72.0) Gecko/20100101 Firefox/72.0' | |
# From http://stackoverflow.com/a/1160227 | |
if sys.version_info < (3, 3): | |
def touch(fname, mode=0o666, dir_fd=None, **kwargs): | |
flags = os.O_CREAT | os.O_APPEND | |
times = kwargs['times'] if 'times' in kwargs else None | |
with os.fdopen(os.open(fname, flags, mode)) as f: | |
os.utime(fname, times) | |
else: | |
def touch(fname, mode=0o666, dir_fd=None, **kwargs): | |
flags = os.O_CREAT | os.O_APPEND | |
with os.fdopen(os.open(fname, flags=flags, mode=mode, dir_fd=dir_fd)) as f: | |
os.utime(f.fileno() if os.utime in os.supports_fd else fname, | |
dir_fd=None if os.supports_fd else dir_fd, **kwargs) | |
# From http://stackoverflow.com/a/7244263 | |
def downloadFile(url, file_name): | |
req = Request(url) | |
req.add_header('User-Agent', USER_AGENT) | |
# Download the file from `url` and save it locally under `file_name`: | |
with urlopen(req) as response, open(file_name, 'wb') as out_file: | |
data = response.read() # a `bytes` object | |
out_file.write(data) | |
def getContentLength(url): | |
print("Enclosure length mismatch. Checking content-length explicitly...") | |
req = Request(url) | |
req.add_header('User-Agent', USER_AGENT) | |
with urlopen(req) as response: | |
size = response.headers.get("content-length") | |
return int(size) if size else 0 | |
def downloadAll(feedURL): | |
feed = feedparser.parse(feedURL) | |
print("Processing feed %s..." % feed['feed']['title']) | |
for post in reversed(feed.entries): | |
print("Processing item %s... " % post.title, end="") | |
if len(post.enclosures) != 1: | |
print("Post has %d enclosures, not 1. Skipping post." | |
% len(post.enclosures)) | |
else: | |
media = post.enclosures[0] | |
mediaURL = media.href | |
# cut the extension off the end to use in the filename | |
mediaExt = mediaURL[mediaURL.rfind('.')+1:] | |
quesPos = mediaExt.find('?') | |
if quesPos != -1: | |
mediaExt = mediaExt[:quesPos] | |
mediaSize = int(media.length) # media.length is of type str | |
filename = "%s.%s" % (post.title.replace('/', '_'), mediaExt) | |
stat = os.stat(filename) if os.path.isfile(filename) else None | |
if stat and stat.st_size > 0\ | |
and (\ | |
stat.st_size == mediaSize\ | |
or mediaSize == 0\ | |
or getContentLength(mediaURL) == stat.st_size # manually verify enclosure length; it might be wrong | |
): | |
print("File already downloaded. Skipping.") | |
else: | |
if stat and stat.st_size != mediaSize: | |
print("Incorrect file found. Redownloading... ", end="") | |
sys.stdout.flush() | |
else: | |
print("Downloading... ", end="") | |
sys.stdout.flush() | |
# Download the file... | |
downloadFile(mediaURL, filename) | |
print("Done.") | |
# From http://stackoverflow.com/a/1697907 | |
pubTimestamp = time.mktime(post.published_parsed) | |
# ... and set its created time to the publication time. | |
touch(filename, times=(stat.st_atime if stat else pubTimestamp, | |
pubTimestamp)) | |
_feedURLfilename = ".podcast_source" | |
if __name__ == "__main__": | |
# Remember the feed URL in a hidden file named _feedURLfilename | |
# Always use the command-line URL if given and remember it in that file. | |
if len(sys.argv) == 2: | |
feedURL = sys.argv[1] | |
with open(_feedURLfilename, 'w') as feedURLfile: | |
feedURLfile.write(feedURL) | |
elif len(sys.argv) == 1 and os.path.isfile(_feedURLfilename): | |
with open(_feedURLfilename, 'r') as feedURLfile: | |
feedURL = feedURLfile.read() | |
else: | |
print("USAGE: %s feedURL" % sys.argv[0]) | |
sys.exit(1) | |
downloadAll(feedURL) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment