Created
June 12, 2016 08:36
-
-
Save enosh/4b47a60ab66637236de6df05494e5cd3 to your computer and use it in GitHub Desktop.
A script to create a graph of podcast length vs episode number, from an RSS feed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Create a graph of podcast length (in minutes) vs number, from an RSS feed. | |
| Usage: | |
| python(3) <script> <the RSS feed (local or a URL)> <regex expression to split episode title and name; optional but | |
| useful when the feed doesn't include all the episodes> | |
| Examples: | |
| python3 graph_podcast_duration.py "http://atp.fm/episodes?format=rss" "^(\d{1,4}): (.+)" | |
| python3 graph_podcast_duration.py "https://www.relay.fm/rd/feed" | |
| python3 graph_podcast_duration.py "http://feeds.5by5.tv/ia" | |
| """ | |
| import sys, re, datetime | |
| import feedparser | |
| import matplotlib.pyplot as plt | |
| import matplotlib.ticker as plticker | |
| d = feedparser.parse(sys.argv[1]) | |
| entries = d.entries[::-1] | |
| x, y = ([], []) | |
| for i in range(len(entries)): | |
| if len(entries[i].itunes_duration.split(':')) == 2: | |
| minutes, seconds = [int(v) for v in entries[i].itunes_duration.split(':')] | |
| duration = datetime.timedelta(minutes=minutes, seconds=seconds) | |
| elif len(entries[i].itunes_duration.split(':')) == 3: | |
| hours, minutes, seconds = [int(v) for v in entries[i].itunes_duration.split(':')] | |
| duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds) | |
| else: | |
| duration = datetime.timedelta(seconds=int(entries[i].itunes_duration)) | |
| try: | |
| # title = re.sub(regex, r'\2', entries[i].title) | |
| episode = int(re.sub(sys.argv[2], r'\1', entries[i].title)) | |
| except IndexError: | |
| episode = i | |
| x.append(episode), y.append(int(duration.seconds / 60)) | |
| fig, ax = plt.subplots() | |
| ax.bar(x, y, 1/2, color="blue") | |
| ax.set_ylabel('Episode length (minutes)') | |
| ax.set_xlabel('Episode number') | |
| ax.set_title(d['feed']['title'] + "\n") | |
| xlabelrotation = 90 | |
| xticker = 5 | |
| for label in ax.xaxis.get_ticklabels(): | |
| label.set_rotation(xlabelrotation) | |
| ax.set_xlim(xmin=int(x[0]), xmax=float(x[-1]) + 0.5) | |
| ax.xaxis.set_major_locator(plticker.MultipleLocator(base=xticker)) | |
| ax.yaxis.set_major_locator(plticker.MultipleLocator(base=30)) | |
| ax.grid(True) | |
| plt.show() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Create a graph of podcast length (in hours) vs number, from an RSS feed. | |
| Usage: | |
| python(3) <script> <the RSS feed (local or a URL)> <regex expression to split episode title and name; optional but | |
| useful when the feed doesn't include all the episodes> | |
| Examples: | |
| python3 graph_podcast_duration.py "http://atp.fm/episodes?format=rss" "^(\d{1,4}): (.+)" | |
| python3 graph_podcast_duration.py "https://www.relay.fm/rd/feed" | |
| python3 graph_podcast_duration.py "http://feeds.5by5.tv/ia" | |
| """ | |
| import sys, re, datetime | |
| import feedparser | |
| import matplotlib.pyplot as plt | |
| import matplotlib.dates as mdates | |
| import matplotlib.ticker as plticker | |
| d = feedparser.parse(sys.argv[1]) | |
| entries = d.entries[::-1] | |
| x, y = ([], []) | |
| for i in range(len(entries)): | |
| if len(entries[i].itunes_duration.split(':')) == 2: | |
| duration = datetime.datetime.strptime(entries[i].itunes_duration, '%M:%S') | |
| elif len(entries[i].itunes_duration.split(':')) == 3: | |
| duration = datetime.datetime.strptime(entries[i].itunes_duration, '%H:%M:%S') | |
| else: | |
| duration = datetime.datetime(1900, 1, 1, 0, 0) + datetime.timedelta(seconds=int(entries[i].itunes_duration)) | |
| try: | |
| # title = re.sub(regex, r'\2', entries[i].title) | |
| episode = int(re.sub(sys.argv[2], r'\1', entries[i].title)) | |
| except IndexError: | |
| episode = i + 1 | |
| x.append(episode), y.append(duration) | |
| fig, ax = plt.subplots() | |
| ax.plot(x, y, "^") | |
| ax.set_ylabel('Episode length') | |
| ax.set_xlabel('Episode number') | |
| ax.set_title(d['feed']['title'] + "\n") | |
| for label in ax.xaxis.get_ticklabels(): | |
| label.set_rotation(90) | |
| ax.xaxis.set_major_locator(plticker.MultipleLocator(base=5)) | |
| ax.yaxis.set_major_locator(mdates.MinuteLocator(byminute=range(0, 60, 30))) | |
| ax.yaxis.set_major_formatter(mdates.DateFormatter('%H:%M')) | |
| ax.yaxis.set_minor_locator(mdates.MinuteLocator(byminute=range(0, 60, 5))) | |
| ax.set_xlim(xmin=int(x[0]) - 2, xmax=int(x[-1]) + 2) | |
| datemin = datetime.datetime.strptime("00:00", '%H:%M') | |
| datemax = max(y) + datetime.timedelta(seconds=300) | |
| ax.set_ylim(datemin, datemax) | |
| ax.format_ydata = mdates.DateFormatter('%H-%M') | |
| ax.grid(True) | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment