yayadrian · May 25, 2023 07:59
diff --git a/conf.py b/conf.py
 # conf.py

 SESSION_PATH = "./sessions" # will create the sessions file in the current folder
 EMAIL = "CHANGE-ME" 
 PASSWORD = "CHANGE-ME"
 HISTORY_PATH = "./history" # will create the records of episodes in the history folder
 ENDPOINT_URL = "https://api.example.com/endpoint" 
 EXPORT_FILENAME = "./playedEpisodes.csv" # export file for the episodes played 
 MD_TEMPLATE = 'CHANGE-ME' # path to the obsidian template MD
 OUTPUT_FOLDER = 'CHANGE-ME' # path for the Obsidian output folder
 NUMBER_DAYS_BACK = 5 # how many days to go back in history to record
diff --git a/overcast-recently-played.py b/overcast-recently-played.py
 '''
 https://gist.github.com/cleverdevil/a8215850420493c1ee06364161e281c0

 You'll need to pip install some dependencies:

 * python-dateutil
 * requests

 Also, populate your EMAIL and PASSWORD below.
 '''

 from xml.etree import ElementTree
 from datetime import datetime
 from dateutil.tz import UTC
 from dateutil.parser import parse as parse_dt

 import conf
 import re
 import sys
 import requests
 import pickle
 import os.path
 import json
 import csv

 # load stored session, or re-authenticate
 if os.path.exists(conf.SESSION_PATH):
    print('Found saved session. Restoring!')
    session = pickle.loads(open(conf.SESSION_PATH, 'rb').read())
 else:
    print('No saved session. Authenticating!')
    session = requests.Session()
    response = session.post('https://overcast.fm/login', data={
        'email': conf.EMAIL,
        'password': conf.PASSWORD
    })

    if response.status_code != 200:
        print('Authentication failed')
        sys.exit(0)

    print('Authenticated successfully. Saving session.')

    with open(conf.SESSION_PATH, 'wb') as saved_session:
        saved_session.write(pickle.dumps(session))

 # fetch the latest detailed OPML export from Overcast
 print('Fetching latest OPML export from Overcast')
 response = session.get('https://overcast.fm/account/export_opml/extended')
 if response.status_code != 200:
    print('Failed to fetch OPML. Exiting.')
    print(response.text)
    print(response.headers)
    sys.exit(0)

 # cache the last OPML file
 try:
    with open('/tmp/overcast.opml', 'w') as f:
        f.write(response.text)
 except:
    print('Unable to cache OPML file.')

 # parse the OPML
 tree = ElementTree.fromstring(response.text)

 # find all podcasts and their episodes
 podcasts = tree.findall(".//*[@type='rss']")

 # look for recently played episodes
 now = datetime.utcnow().astimezone(UTC)
 for podcast in podcasts:
    pod_title = podcast.attrib['title']
    for episode in list(podcast):
        # skip unplayed episodes
        played = episode.attrib.get('played', '0') == '1'
        if not played:
            continue

        # skip episodes played over 5 days ago
        user_activity_date_raw = episode.attrib.get('userUpdatedDate')
        user_activity_date = parse_dt(user_activity_date_raw)
        recency = now - user_activity_date
        if recency.days > conf.NUMBER_DAYS_BACK:
            continue

        # parse out the remaining details we care about
        title = episode.attrib['title']
        published = parse_dt(episode.attrib['pubDate'])
        published_str = episode.attrib['pubDate']
        url = episode.attrib['url']
        overcast_url = episode.attrib['overcastUrl']
        overcast_id = episode.attrib['overcastId']
        progress = episode.attrib.get('progress')

        # Format the datetime object as a string in the "YYYY-MM-DD" format
        published_date_str = published.strftime('%Y-%m-%d')
        listen_date_str = user_activity_date.strftime('%Y-%m-%d')

    # fetch the epside artwork
        response = session.get(overcast_url)
        results = re.findall('img class="art fullart" src="(.*)"', response.text)
        artwork_url = ''
        if len(results) == 1:
            artwork_url = results[0]
        else:
            print('Cannot find podcast artwork for this show... skipping...')

    # fetch the episode summary
        results = re.findall('meta name="og:description" content="(.*)"', response.text)
        summary = title
        if len(results) == 1 and len(results[0]):
            summary = results[0]

    # fetch the episode length (timeremaining)
        # TODO: Find where to get length from
        # results = re.findall('<span id="timeremaining" class="lighttext">(.*)', response.text)
        duration = '00:00'
        # if len(results) == 1:
        #     duration = results[0]

        # if len(results) == 1:
        #     print('****** Found DURATION!!!! ')
        #     duration = results[0]
        # else:
        #     print('****** can not find duration')

 # publish if needed
        footprint = conf.HISTORY_PATH + '/' + overcast_id
        if os.path.exists(footprint):
            print('Skipping already published ep ->', title)
            continue

        print('Played episode of ', pod_title)
        print('    ->', title)
        print('    ->', summary)
        print('    ->', published)
        print('    ->', artwork_url)
        print('    ->', url)
        print('    ->', overcast_url)
        print('    ->', duration)
        print('    ->', user_activity_date_raw)


    # build payload
        episodeData = {
            'overcast_id': overcast_id,
            'title': title,
            'summary': summary,
            'published': published,
            'type': 'podcast',
            'author': pod_title,
            'link': overcast_url,
            'duration': duration,
            'listenDateTime': user_activity_date_raw,
            'artwork_url': artwork_url
        }

        poster_response = requests.get(artwork_url)

 # CREATE CSV
        # Open the CSV file in append mode
        with open(conf.EXPORT_FILENAME, 'a', newline='') as csvfile:
            # Create a CSV writer using a fieldnames list
            fieldnames = ['overcast_id', 'title', 'summary', 'published', 'type', 'author', 'link', 'duration', 'listenDateTime', 'artwork_url']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            # Add a new row to the CSV file with the values you want
            writer.writerow(episodeData)
        open(footprint, 'w').write("All good")

 # Obsidian create page
        # Open the template file in read mode
        with open(conf.MD_TEMPLATE, 'r') as template_file:
            # Read the contents of the file into a string
            template_str = template_file.read()

            # Perform find and replace operations on the template string
            modified_str = template_str.replace('{{Title}}', episodeData['title'])

            modified_str = modified_str.replace('{{Description}}', episodeData['summary']) 
            modified_str = modified_str.replace('{{PodcastURL}}', episodeData['link'])
            modified_str = modified_str.replace('{{listenedDate}}', listen_date_str)
            modified_str = modified_str.replace('{{EpisodeDate}}', published_date_str)
            modified_str = modified_str.replace('{{author}}', episodeData['author'])
            modified_str = modified_str.replace('{{ImageURL}}', episodeData['artwork_url'])

            # Clean the title and author strings
            clean_title = re.sub(r'\W+', ' ', episodeData['title'])
            clean_author = re.sub(r'\W+', ' ', episodeData['author'])

            # Format the output file name using the cleaned strings
            output_file_name = conf.OUTPUT_FOLDER + '{} - {}.md'.format(clean_title, clean_author)

            # Open the output file in write mode using the formatted file name
            with open(output_file_name, 'w') as output_file:
                # Write the modified string to the output file
                output_file.write(modified_str)
	# conf.py

	SESSION_PATH = "./sessions" # will create the sessions file in the current folder
	EMAIL = "CHANGE-ME"
	PASSWORD = "CHANGE-ME"
	HISTORY_PATH = "./history" # will create the records of episodes in the history folder
	ENDPOINT_URL = "https://api.example.com/endpoint"
	EXPORT_FILENAME = "./playedEpisodes.csv" # export file for the episodes played
	MD_TEMPLATE = 'CHANGE-ME' # path to the obsidian template MD
	OUTPUT_FOLDER = 'CHANGE-ME' # path for the Obsidian output folder
	NUMBER_DAYS_BACK = 5 # how many days to go back in history to record
	'''
	https://gist.github.com/cleverdevil/a8215850420493c1ee06364161e281c0

	You'll need to pip install some dependencies:

	* python-dateutil
	* requests

	Also, populate your EMAIL and PASSWORD below.
	'''

	from xml.etree import ElementTree
	from datetime import datetime
	from dateutil.tz import UTC
	from dateutil.parser import parse as parse_dt

	import conf
	import re
	import sys
	import requests
	import pickle
	import os.path
	import json
	import csv

	# load stored session, or re-authenticate
	if os.path.exists(conf.SESSION_PATH):
	print('Found saved session. Restoring!')
	session = pickle.loads(open(conf.SESSION_PATH, 'rb').read())
	else:
	print('No saved session. Authenticating!')
	session = requests.Session()
	response = session.post('https://overcast.fm/login', data={
	'email': conf.EMAIL,
	'password': conf.PASSWORD
	})

	if response.status_code != 200:
	print('Authentication failed')
	sys.exit(0)

	print('Authenticated successfully. Saving session.')

	with open(conf.SESSION_PATH, 'wb') as saved_session:
	saved_session.write(pickle.dumps(session))

	# fetch the latest detailed OPML export from Overcast
	print('Fetching latest OPML export from Overcast')
	response = session.get('https://overcast.fm/account/export_opml/extended')
	if response.status_code != 200:
	print('Failed to fetch OPML. Exiting.')
	print(response.text)
	print(response.headers)
	sys.exit(0)

	# cache the last OPML file
	try:
	with open('/tmp/overcast.opml', 'w') as f:
	f.write(response.text)
	except:
	print('Unable to cache OPML file.')

	# parse the OPML
	tree = ElementTree.fromstring(response.text)

	# find all podcasts and their episodes
	podcasts = tree.findall(".//*[@type='rss']")

	# look for recently played episodes
	now = datetime.utcnow().astimezone(UTC)
	for podcast in podcasts:
	pod_title = podcast.attrib['title']
	for episode in list(podcast):
	# skip unplayed episodes
	played = episode.attrib.get('played', '0') == '1'
	if not played:
	continue

	# skip episodes played over 5 days ago
	user_activity_date_raw = episode.attrib.get('userUpdatedDate')
	user_activity_date = parse_dt(user_activity_date_raw)
	recency = now - user_activity_date
	if recency.days > conf.NUMBER_DAYS_BACK:
	continue

	# parse out the remaining details we care about
	title = episode.attrib['title']
	published = parse_dt(episode.attrib['pubDate'])
	published_str = episode.attrib['pubDate']
	url = episode.attrib['url']
	overcast_url = episode.attrib['overcastUrl']
	overcast_id = episode.attrib['overcastId']
	progress = episode.attrib.get('progress')

	# Format the datetime object as a string in the "YYYY-MM-DD" format
	published_date_str = published.strftime('%Y-%m-%d')
	listen_date_str = user_activity_date.strftime('%Y-%m-%d')

	# fetch the epside artwork
	response = session.get(overcast_url)
	results = re.findall('img class="art fullart" src="(.*)"', response.text)
	artwork_url = ''
	if len(results) == 1:
	artwork_url = results[0]
	else:
	print('Cannot find podcast artwork for this show... skipping...')

	# fetch the episode summary
	results = re.findall('meta name="og:description" content="(.*)"', response.text)
	summary = title
	if len(results) == 1 and len(results[0]):
	summary = results[0]

	# fetch the episode length (timeremaining)
	# TODO: Find where to get length from
	# results = re.findall('<span id="timeremaining" class="lighttext">(.*)', response.text)
	duration = '00:00'
	# if len(results) == 1:
	# duration = results[0]

	# if len(results) == 1:
	# print('****** Found DURATION!!!! ')
	# duration = results[0]
	# else:
	# print('****** can not find duration')

	# publish if needed
	footprint = conf.HISTORY_PATH + '/' + overcast_id
	if os.path.exists(footprint):
	print('Skipping already published ep ->', title)
	continue

	print('Played episode of ', pod_title)
	print(' ->', title)
	print(' ->', summary)
	print(' ->', published)
	print(' ->', artwork_url)
	print(' ->', url)
	print(' ->', overcast_url)
	print(' ->', duration)
	print(' ->', user_activity_date_raw)


	# build payload
	episodeData = {
	'overcast_id': overcast_id,
	'title': title,
	'summary': summary,
	'published': published,
	'type': 'podcast',
	'author': pod_title,
	'link': overcast_url,
	'duration': duration,
	'listenDateTime': user_activity_date_raw,
	'artwork_url': artwork_url
	}

	poster_response = requests.get(artwork_url)

	# CREATE CSV
	# Open the CSV file in append mode
	with open(conf.EXPORT_FILENAME, 'a', newline='') as csvfile:
	# Create a CSV writer using a fieldnames list
	fieldnames = ['overcast_id', 'title', 'summary', 'published', 'type', 'author', 'link', 'duration', 'listenDateTime', 'artwork_url']
	writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
	# Add a new row to the CSV file with the values you want
	writer.writerow(episodeData)
	open(footprint, 'w').write("All good")

	# Obsidian create page
	# Open the template file in read mode
	with open(conf.MD_TEMPLATE, 'r') as template_file:
	# Read the contents of the file into a string
	template_str = template_file.read()

	# Perform find and replace operations on the template string
	modified_str = template_str.replace('{{Title}}', episodeData['title'])

	modified_str = modified_str.replace('{{Description}}', episodeData['summary'])
	modified_str = modified_str.replace('{{PodcastURL}}', episodeData['link'])
	modified_str = modified_str.replace('{{listenedDate}}', listen_date_str)
	modified_str = modified_str.replace('{{EpisodeDate}}', published_date_str)
	modified_str = modified_str.replace('{{author}}', episodeData['author'])
	modified_str = modified_str.replace('{{ImageURL}}', episodeData['artwork_url'])

	# Clean the title and author strings
	clean_title = re.sub(r'\W+', ' ', episodeData['title'])
	clean_author = re.sub(r'\W+', ' ', episodeData['author'])

	# Format the output file name using the cleaned strings
	output_file_name = conf.OUTPUT_FOLDER + '{} - {}.md'.format(clean_title, clean_author)

	# Open the output file in write mode using the formatted file name
	with open(output_file_name, 'w') as output_file:
	# Write the modified string to the output file
	output_file.write(modified_str)