Last active
March 4, 2024 09:28
-
-
Save cleverdevil/a8215850420493c1ee06364161e281c0 to your computer and use it in GitHub Desktop.
Fetch recently played episodes from Overcast.fm. Then, publish history to my website.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
You'll need to pip install some dependencies: | |
* python-dateutil | |
* requests | |
Also, populate your EMAIL and PASSWORD below. | |
''' | |
from xml.etree import ElementTree | |
from datetime import datetime | |
from dateutil.tz import UTC | |
from dateutil.parser import parse as parse_dt | |
import conf | |
import re | |
import sys | |
import requests | |
import pickle | |
import os.path | |
import json | |
# load stored session, or re-authenticate | |
if os.path.exists(conf.SESSION_PATH): | |
print('Found saved session. Restoring!') | |
session = pickle.loads(open(conf.SESSION_PATH, 'rb').read()) | |
else: | |
print('No saved session. Authenticating!') | |
session = requests.Session() | |
response = session.post('https://overcast.fm/login', data={ | |
'email': conf.EMAIL, | |
'password': conf.PASSWORD | |
}) | |
if response.status_code != 200: | |
print('Authentication failed') | |
sys.exit(0) | |
print('Authenticated successfully. Saving session.') | |
with open(conf.SESSION_PATH, 'wb') as saved_session: | |
saved_session.write(pickle.dumps(session)) | |
# fetch the latest detailed OPML export from Overcast | |
print('Fetching latest OPML export from Overcast') | |
response = session.get('https://overcast.fm/account/export_opml/extended') | |
if response.status_code != 200: | |
print('Failed to fetch OPML. Exiting.') | |
print(response.text) | |
print(response.headers) | |
sys.exit(0) | |
# cache the last OPML file | |
try: | |
with open('/tmp/overcast.opml', 'w') as f: | |
f.write(response.text) | |
except: | |
print('Unable to cache OPML file.') | |
# parse the OPML | |
tree = ElementTree.fromstring(response.text) | |
# find all podcasts and their episodes | |
podcasts = tree.findall(".//*[@type='rss']") | |
# look for recently played episodes | |
now = datetime.utcnow().astimezone(UTC) | |
for podcast in podcasts: | |
pod_title = podcast.attrib['title'] | |
for episode in list(podcast): | |
# skip unplayed episodes | |
played = episode.attrib.get('played', '0') == '1' | |
if not played: | |
continue | |
# skip episodes played over 5 days ago | |
user_activity_date_raw = episode.attrib.get('userUpdatedDate') | |
user_activity_date = parse_dt(user_activity_date_raw) | |
recency = now - user_activity_date | |
if recency.days > 5: | |
continue | |
# parse out the remaining details we care about | |
title = episode.attrib['title'] | |
published = parse_dt(episode.attrib['pubDate']) | |
url = episode.attrib['url'] | |
overcast_url = episode.attrib['overcastUrl'] | |
overcast_id = episode.attrib['overcastId'] | |
progress = episode.attrib.get('progress') | |
# fetch the epside artwork | |
response = session.get(overcast_url) | |
results = re.findall('img class="art fullart" src="(.*)"', response.text) | |
artwork_url = '' | |
if len(results) == 1: | |
artwork_url = results[0] | |
else: | |
print('Cannot find podcast artwork for this show... skipping...') | |
# fetch the episode summary | |
results = re.findall('meta name="og:description" content="(.*)"', response.text) | |
summary = title | |
if len(results) == 1 and len(results[0]): | |
summary = results[0] | |
# publish if needed | |
footprint = conf.HISTORY_PATH + '/' + overcast_id | |
if os.path.exists(footprint): | |
print('Skipping already published ep ->', title) | |
continue | |
print('Played episode of ', pod_title) | |
print(' ->', title) | |
print(' ->', summary) | |
print(' ->', artwork_url) | |
print(' ->', url) | |
print(' ->', overcast_url) | |
print(' ->', user_activity_date_raw) | |
# build payload | |
data = { | |
'title': title, | |
'summary': summary, | |
'type': 'podcast', | |
'author': pod_title, | |
'link': overcast_url, | |
'listenDateTime': user_activity_date_raw | |
} | |
poster_response = requests.get(artwork_url) | |
response = requests.post(conf.ENDPOINT_URL, data={ | |
'payload': json.dumps(data) | |
}, files={ | |
'photo': ( | |
artwork_url.rsplit('/', 1)[1], | |
poster_response.content, | |
poster_response.headers['Content-Type'], | |
{'Expires': '0'} | |
) | |
}) | |
if response.status_code in (200, 201, 202): | |
open(footprint, 'w').write(json.dumps(data)) | |
print('Successfully published!') | |
print(response.headers) | |
else: | |
print('Failed to publish!') | |
print(response.status_code) | |
print(response.text) |
@StrikeZW everything seems to be working fine for me. Have you created a conf.py
file with SESSION_PATH
, EMAIL
, and PASSWORD
, HISTORY_PATH
, and ENDPOINT_URL
defined?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello,
is extended opml download broken somehow? i can do it via browser, but with this python script, i only get a file which is not the xml i was looking for. It seems that the authentication is not valid any longer after the session is saved ... any hints?