-
-
Save yayadrian/67ea73d5e6635d8a41561f4608053539 to your computer and use it in GitHub Desktop.
Fetch recently played episodes from Overcast.fm. Then, create a CSV and create files in Obsidian using a template
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# conf.py | |
SESSION_PATH = "./sessions" # will create the sessions file in the current folder | |
EMAIL = "CHANGE-ME" | |
PASSWORD = "CHANGE-ME" | |
HISTORY_PATH = "./history" # will create the records of episodes in the history folder | |
ENDPOINT_URL = "https://api.example.com/endpoint" | |
EXPORT_FILENAME = "./playedEpisodes.csv" # export file for the episodes played | |
MD_TEMPLATE = 'CHANGE-ME' # path to the obsidian template MD | |
OUTPUT_FOLDER = 'CHANGE-ME' # path for the Obsidian output folder | |
NUMBER_DAYS_BACK = 5 # how many days to go back in history to record |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
https://gist.github.com/cleverdevil/a8215850420493c1ee06364161e281c0 | |
You'll need to pip install some dependencies: | |
* python-dateutil | |
* requests | |
Also, populate your EMAIL and PASSWORD below. | |
''' | |
from xml.etree import ElementTree | |
from datetime import datetime | |
from dateutil.tz import UTC | |
from dateutil.parser import parse as parse_dt | |
import conf | |
import re | |
import sys | |
import requests | |
import pickle | |
import os.path | |
import json | |
import csv | |
# load stored session, or re-authenticate | |
if os.path.exists(conf.SESSION_PATH): | |
print('Found saved session. Restoring!') | |
session = pickle.loads(open(conf.SESSION_PATH, 'rb').read()) | |
else: | |
print('No saved session. Authenticating!') | |
session = requests.Session() | |
response = session.post('https://overcast.fm/login', data={ | |
'email': conf.EMAIL, | |
'password': conf.PASSWORD | |
}) | |
if response.status_code != 200: | |
print('Authentication failed') | |
sys.exit(0) | |
print('Authenticated successfully. Saving session.') | |
with open(conf.SESSION_PATH, 'wb') as saved_session: | |
saved_session.write(pickle.dumps(session)) | |
# fetch the latest detailed OPML export from Overcast | |
print('Fetching latest OPML export from Overcast') | |
response = session.get('https://overcast.fm/account/export_opml/extended') | |
if response.status_code != 200: | |
print('Failed to fetch OPML. Exiting.') | |
print(response.text) | |
print(response.headers) | |
sys.exit(0) | |
# cache the last OPML file | |
try: | |
with open('/tmp/overcast.opml', 'w') as f: | |
f.write(response.text) | |
except: | |
print('Unable to cache OPML file.') | |
# parse the OPML | |
tree = ElementTree.fromstring(response.text) | |
# find all podcasts and their episodes | |
podcasts = tree.findall(".//*[@type='rss']") | |
# look for recently played episodes | |
now = datetime.utcnow().astimezone(UTC) | |
for podcast in podcasts: | |
pod_title = podcast.attrib['title'] | |
for episode in list(podcast): | |
# skip unplayed episodes | |
played = episode.attrib.get('played', '0') == '1' | |
if not played: | |
continue | |
# skip episodes played over 5 days ago | |
user_activity_date_raw = episode.attrib.get('userUpdatedDate') | |
user_activity_date = parse_dt(user_activity_date_raw) | |
recency = now - user_activity_date | |
if recency.days > conf.NUMBER_DAYS_BACK: | |
continue | |
# parse out the remaining details we care about | |
title = episode.attrib['title'] | |
published = parse_dt(episode.attrib['pubDate']) | |
published_str = episode.attrib['pubDate'] | |
url = episode.attrib['url'] | |
overcast_url = episode.attrib['overcastUrl'] | |
overcast_id = episode.attrib['overcastId'] | |
progress = episode.attrib.get('progress') | |
# Format the datetime object as a string in the "YYYY-MM-DD" format | |
published_date_str = published.strftime('%Y-%m-%d') | |
listen_date_str = user_activity_date.strftime('%Y-%m-%d') | |
# fetch the epside artwork | |
response = session.get(overcast_url) | |
results = re.findall('img class="art fullart" src="(.*)"', response.text) | |
artwork_url = '' | |
if len(results) == 1: | |
artwork_url = results[0] | |
else: | |
print('Cannot find podcast artwork for this show... skipping...') | |
# fetch the episode summary | |
results = re.findall('meta name="og:description" content="(.*)"', response.text) | |
summary = title | |
if len(results) == 1 and len(results[0]): | |
summary = results[0] | |
# fetch the episode length (timeremaining) | |
# TODO: Find where to get length from | |
# results = re.findall('<span id="timeremaining" class="lighttext">(.*)', response.text) | |
duration = '00:00' | |
# if len(results) == 1: | |
# duration = results[0] | |
# if len(results) == 1: | |
# print('****** Found DURATION!!!! ') | |
# duration = results[0] | |
# else: | |
# print('****** can not find duration') | |
# publish if needed | |
footprint = conf.HISTORY_PATH + '/' + overcast_id | |
if os.path.exists(footprint): | |
print('Skipping already published ep ->', title) | |
continue | |
print('Played episode of ', pod_title) | |
print(' ->', title) | |
print(' ->', summary) | |
print(' ->', published) | |
print(' ->', artwork_url) | |
print(' ->', url) | |
print(' ->', overcast_url) | |
print(' ->', duration) | |
print(' ->', user_activity_date_raw) | |
# build payload | |
episodeData = { | |
'overcast_id': overcast_id, | |
'title': title, | |
'summary': summary, | |
'published': published, | |
'type': 'podcast', | |
'author': pod_title, | |
'link': overcast_url, | |
'duration': duration, | |
'listenDateTime': user_activity_date_raw, | |
'artwork_url': artwork_url | |
} | |
poster_response = requests.get(artwork_url) | |
# CREATE CSV | |
# Open the CSV file in append mode | |
with open(conf.EXPORT_FILENAME, 'a', newline='') as csvfile: | |
# Create a CSV writer using a fieldnames list | |
fieldnames = ['overcast_id', 'title', 'summary', 'published', 'type', 'author', 'link', 'duration', 'listenDateTime', 'artwork_url'] | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
# Add a new row to the CSV file with the values you want | |
writer.writerow(episodeData) | |
open(footprint, 'w').write("All good") | |
# Obsidian create page | |
# Open the template file in read mode | |
with open(conf.MD_TEMPLATE, 'r') as template_file: | |
# Read the contents of the file into a string | |
template_str = template_file.read() | |
# Perform find and replace operations on the template string | |
modified_str = template_str.replace('{{Title}}', episodeData['title']) | |
modified_str = modified_str.replace('{{Description}}', episodeData['summary']) | |
modified_str = modified_str.replace('{{PodcastURL}}', episodeData['link']) | |
modified_str = modified_str.replace('{{listenedDate}}', listen_date_str) | |
modified_str = modified_str.replace('{{EpisodeDate}}', published_date_str) | |
modified_str = modified_str.replace('{{author}}', episodeData['author']) | |
modified_str = modified_str.replace('{{ImageURL}}', episodeData['artwork_url']) | |
# Clean the title and author strings | |
clean_title = re.sub(r'\W+', ' ', episodeData['title']) | |
clean_author = re.sub(r'\W+', ' ', episodeData['author']) | |
# Format the output file name using the cleaned strings | |
output_file_name = conf.OUTPUT_FOLDER + '{} - {}.md'.format(clean_title, clean_author) | |
# Open the output file in write mode using the formatted file name | |
with open(output_file_name, 'w') as output_file: | |
# Write the modified string to the output file | |
output_file.write(modified_str) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment