Skip to content

Instantly share code, notes, and snippets.

@drakeguan
Last active October 14, 2020 22:18
Show Gist options
  • Save drakeguan/49e1590818479e48db7f706ef8cabefa to your computer and use it in GitHub Desktop.
Save drakeguan/49e1590818479e48db7f706ef8cabefa to your computer and use it in GitHub Desktop.
Extract audio track, convert to CAF/Opus, and generate a RSS2 feed for personal hosting podcast!
#!/bin/bash
ffmpeg -y -i "$f" -vn -c:a libfdk_aac -b:a 128k "audio/$(basename $f .mp4).m4a"
afconvert -f caff -d opus -b 8000 "audio/$(basename $f .mp4).m4a" "opus/$(basename $f .mp4).caf"
#!/usr/bin/env python
# In this code snippet, what I'm going to do is
# 1. extract the audio track from .mp4 files and convert it into .m4a with 128kb AAC.
# 2. convert to .caf (Core Audio Format) with 8kb Opus.
# 3. generate a RSS2 feed for self-hosting podcast.
# built-in modules
import datetime
import json
import pathlib
from pathlib import Path
import re
import sys
import uuid
# 3rd-party modules
import plumbum
import arrow
import rfeed
from rfeed import *
def get_items(path = None):
from plumbum.cmd import afconvert, ffprobe, ffmpeg
items = []
if path is None:
path = '.'
root = Path(path)
prog = re.compile('.*(\d{3})\..*title_(.+)_speaker_(.+)\..+$')
for file_ in root.glob('*.mp4'):
m = prog.match(str(file_))
if m is None:
continue
#print(m.group(0), '--', m.group(1), m.group(2), m.group(3))
number = m.group(1)
title = m.group(2)
author = m.group(3)
file_m4a = root.cwd().joinpath('audio', file_.stem + '.m4a')
# extract audio channel of 128k into .m4a located in /audio/ folder
if not file_m4a.exists():
ffmpeg(['-y', '-i', file_, '-vn', '-c:a', 'libfdk_aac', '-b:a', '128k', file_m4a])
file_caf = root.cwd().joinpath('podcast', number + '.caf')
# convert to 8k .caf with opus codec located in /podcast/, with simplified filename
if not file_caf.exists():
afconvert(['-f', 'caff', '-d', 'opus', '-b', '8000', file_m4a, file_caf])
# get the duration by ffprobe
stdout = ffprobe(['-v', 'quiet', '-of', 'json', '-hide_banner', '-show_format', '-i', file_caf])
json_data = json.loads(stdout)
duration = datetime.timedelta(0, float(json_data['format']['duration']))
itunes_item = iTunesItem(
author = author,
#image = "",
duration = duration,
explicit = "clean",
subtitle = title,
order = int(number),
#summary = ""
)
item = Item(
title = number + '. ' + title + ' | ' + author,
#link = "http://www.example.com/articles/1",
#description = "",
author = author,
guid = Guid(uuid.uuid3(uuid.NAMESPACE_DNS, str(file_))),
pubDate = arrow.Arrow.fromtimestamp(file_.stat().st_mtime),
enclosure = Enclosure(url="https://oo.xx.asdf/"+file_caf.name, length=file_caf.stat().st_size, type='audio/aac'),
extensions = [itunes_item])
items.append(item)
return sorted(items, key=lambda i: i.extensions[0].order, reverse=True)
def main(argv = sys.argv[:]):
items = get_items()
itunes = iTunes(
author = 'John Doe',
subtitle = 'Some title',
summary = 'Some summary',
image = 'https://oo.xx.asdf/cover.jpg',
explicit = 'clean',
categories = iTunesCategory(name = 'Education', subcategory = 'Higher Education'),
owner = iTunesOwner(name = 'John Doe', email = '[email protected]'))
feed = Feed(
title = 'Some title',
link = 'https://oo.xx.asdf',
description = 'Some description',
generator = 'rfeed - An extensible RSS 2.0 Generator written in Python',
docs = 'http://blogs.law.harvard.edu/tech/rss',
language = 'en-US',
pubDate = arrow.now(),
lastBuildDate = arrow.now(),
items = items,
extensions = [itunes])
print(feed.rss())
return 0
if __name__ == '__main__':
sys.exit(main())
# refer: https://github.com/egorsmkv/rfeed
import datetime
from rfeed import *
itunes_item = iTunesItem(
author = "Santiago L. Valdarrama",
image = "http://www.example.com/artwork.jpg",
duration = "01:11:02",
explicit = "clean",
subtitle = "The subtitle of the podcast episode",
summary = "Here is the summary of this specific episode")
item = Item(
title = "Sample article",
link = "http://www.example.com/articles/1",
description = "This is the description of the first article",
author = "Santiago L. Valdarrama",
guid = Guid("http://www.example.com/articles/1"),
pubDate = datetime.datetime(2014, 12, 29, 10, 00),
extensions = [itunes_item])
itunes = iTunes(
author = "Santiago L. Valdarrama",
subtitle = "A sample podcast that will never be produced",
summary = "This is just a fake description",
image = "http://www.example.com/artwork.jpg",
explicit = "clean",
categories = iTunesCategory(name = 'Technology', subcategory = 'Software How-To'),
owner = iTunesOwner(name = 'Santiago L. Valdarrama', email = '[email protected]'))
feed = Feed(
title = "Sample Podcast RSS Feed",
link = "http://www.example.com/rss"
description = "An example of how to generate an RSS 2.0 feed",
language = "en-US",
lastBuildDate = datetime.datetime.now(),
items = [item],
extensions = [itunes])
print feed.rss()
#!/bin/bash
rsync -r -a -v -e ssh podcast/* oo.xx.asdf:public-html/
<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
<channel>
<title>讀書 podcast</title>
<link>https://oo.xx.asdf/</link>
<description>大家一起來讀書</description>
<generator>Feeder 3 3.5.8(3057); Mac OS X Version 10.13.3 (Build 17D47) http://reinventedsoftware.com/feeder/</generator>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<language>en</language>
<pubDate>Mon, 19 Feb 2018 11:22:18 +0800</pubDate>
<lastBuildDate>Mon, 19 Feb 2018 11:22:18 +0800</lastBuildDate>
<itunes:image href="https://oo.xx.asdf/cover.jpg"/>
<itunes:block>no</itunes:block>
<itunes:category text="Education">
<itunes:category text="Higher Education"/>
</itunes:category>
<itunes:type>episodic</itunes:type>
<item>
<title>006.《書本六》</title>
<description><![CDATA[desc blah blah]]></description>
<pubDate>Mon, 19 Feb 2018 11:22:18 +0800</pubDate>
<enclosure url="https://oo.xx.asdf/006.m4a" length="1192902" type="audio/aac"/>
<guid isPermaLink="false">99B07627-1111-2222-3333-DBC6FCEE9372</guid>
<itunes:author>講者三</itunes:author>
<itunes:duration>19:52</itunes:duration>
<itunes:season>0</itunes:season>
<itunes:episode>6</itunes:episode>
<itunes:episodeType>full</itunes:episodeType>
<content:encoded><![CDATA[note blah blah]]></content:encoded>
</item>
</channel>
</rss>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment