Skip to content

Instantly share code, notes, and snippets.

@enosh
Created June 12, 2016 08:36
Show Gist options
  • Select an option

  • Save enosh/4b47a60ab66637236de6df05494e5cd3 to your computer and use it in GitHub Desktop.

Select an option

Save enosh/4b47a60ab66637236de6df05494e5cd3 to your computer and use it in GitHub Desktop.
A script to create a graph of podcast length vs episode number, from an RSS feed.
"""
Create a graph of podcast length (in minutes) vs number, from an RSS feed.
Usage:
python(3) <script> <the RSS feed (local or a URL)> <regex expression to split episode title and name; optional but
useful when the feed doesn't include all the episodes>
Examples:
python3 graph_podcast_duration.py "http://atp.fm/episodes?format=rss" "^(\d{1,4}): (.+)"
python3 graph_podcast_duration.py "https://www.relay.fm/rd/feed"
python3 graph_podcast_duration.py "http://feeds.5by5.tv/ia"
"""
import sys, re, datetime
import feedparser
import matplotlib.pyplot as plt
import matplotlib.ticker as plticker
d = feedparser.parse(sys.argv[1])
entries = d.entries[::-1]
x, y = ([], [])
for i in range(len(entries)):
if len(entries[i].itunes_duration.split(':')) == 2:
minutes, seconds = [int(v) for v in entries[i].itunes_duration.split(':')]
duration = datetime.timedelta(minutes=minutes, seconds=seconds)
elif len(entries[i].itunes_duration.split(':')) == 3:
hours, minutes, seconds = [int(v) for v in entries[i].itunes_duration.split(':')]
duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
else:
duration = datetime.timedelta(seconds=int(entries[i].itunes_duration))
try:
# title = re.sub(regex, r'\2', entries[i].title)
episode = int(re.sub(sys.argv[2], r'\1', entries[i].title))
except IndexError:
episode = i
x.append(episode), y.append(int(duration.seconds / 60))
fig, ax = plt.subplots()
ax.bar(x, y, 1/2, color="blue")
ax.set_ylabel('Episode length (minutes)')
ax.set_xlabel('Episode number')
ax.set_title(d['feed']['title'] + "\n")
xlabelrotation = 90
xticker = 5
for label in ax.xaxis.get_ticklabels():
label.set_rotation(xlabelrotation)
ax.set_xlim(xmin=int(x[0]), xmax=float(x[-1]) + 0.5)
ax.xaxis.set_major_locator(plticker.MultipleLocator(base=xticker))
ax.yaxis.set_major_locator(plticker.MultipleLocator(base=30))
ax.grid(True)
plt.show()
"""
Create a graph of podcast length (in hours) vs number, from an RSS feed.
Usage:
python(3) <script> <the RSS feed (local or a URL)> <regex expression to split episode title and name; optional but
useful when the feed doesn't include all the episodes>
Examples:
python3 graph_podcast_duration.py "http://atp.fm/episodes?format=rss" "^(\d{1,4}): (.+)"
python3 graph_podcast_duration.py "https://www.relay.fm/rd/feed"
python3 graph_podcast_duration.py "http://feeds.5by5.tv/ia"
"""
import sys, re, datetime
import feedparser
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as plticker
d = feedparser.parse(sys.argv[1])
entries = d.entries[::-1]
x, y = ([], [])
for i in range(len(entries)):
if len(entries[i].itunes_duration.split(':')) == 2:
duration = datetime.datetime.strptime(entries[i].itunes_duration, '%M:%S')
elif len(entries[i].itunes_duration.split(':')) == 3:
duration = datetime.datetime.strptime(entries[i].itunes_duration, '%H:%M:%S')
else:
duration = datetime.datetime(1900, 1, 1, 0, 0) + datetime.timedelta(seconds=int(entries[i].itunes_duration))
try:
# title = re.sub(regex, r'\2', entries[i].title)
episode = int(re.sub(sys.argv[2], r'\1', entries[i].title))
except IndexError:
episode = i + 1
x.append(episode), y.append(duration)
fig, ax = plt.subplots()
ax.plot(x, y, "^")
ax.set_ylabel('Episode length')
ax.set_xlabel('Episode number')
ax.set_title(d['feed']['title'] + "\n")
for label in ax.xaxis.get_ticklabels():
label.set_rotation(90)
ax.xaxis.set_major_locator(plticker.MultipleLocator(base=5))
ax.yaxis.set_major_locator(mdates.MinuteLocator(byminute=range(0, 60, 30)))
ax.yaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.yaxis.set_minor_locator(mdates.MinuteLocator(byminute=range(0, 60, 5)))
ax.set_xlim(xmin=int(x[0]) - 2, xmax=int(x[-1]) + 2)
datemin = datetime.datetime.strptime("00:00", '%H:%M')
datemax = max(y) + datetime.timedelta(seconds=300)
ax.set_ylim(datemin, datemax)
ax.format_ydata = mdates.DateFormatter('%H-%M')
ax.grid(True)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment