Last active
October 14, 2020 22:18
-
-
Save drakeguan/49e1590818479e48db7f706ef8cabefa to your computer and use it in GitHub Desktop.
Extract audio track, convert to CAF/Opus, and generate a RSS2 feed for personal hosting podcast!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
ffmpeg -y -i "$f" -vn -c:a libfdk_aac -b:a 128k "audio/$(basename $f .mp4).m4a" | |
afconvert -f caff -d opus -b 8000 "audio/$(basename $f .mp4).m4a" "opus/$(basename $f .mp4).caf" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# In this code snippet, what I'm going to do is | |
# 1. extract the audio track from .mp4 files and convert it into .m4a with 128kb AAC. | |
# 2. convert to .caf (Core Audio Format) with 8kb Opus. | |
# 3. generate a RSS2 feed for self-hosting podcast. | |
# built-in modules | |
import datetime | |
import json | |
import pathlib | |
from pathlib import Path | |
import re | |
import sys | |
import uuid | |
# 3rd-party modules | |
import plumbum | |
import arrow | |
import rfeed | |
from rfeed import * | |
def get_items(path = None): | |
from plumbum.cmd import afconvert, ffprobe, ffmpeg | |
items = [] | |
if path is None: | |
path = '.' | |
root = Path(path) | |
prog = re.compile('.*(\d{3})\..*title_(.+)_speaker_(.+)\..+$') | |
for file_ in root.glob('*.mp4'): | |
m = prog.match(str(file_)) | |
if m is None: | |
continue | |
#print(m.group(0), '--', m.group(1), m.group(2), m.group(3)) | |
number = m.group(1) | |
title = m.group(2) | |
author = m.group(3) | |
file_m4a = root.cwd().joinpath('audio', file_.stem + '.m4a') | |
# extract audio channel of 128k into .m4a located in /audio/ folder | |
if not file_m4a.exists(): | |
ffmpeg(['-y', '-i', file_, '-vn', '-c:a', 'libfdk_aac', '-b:a', '128k', file_m4a]) | |
file_caf = root.cwd().joinpath('podcast', number + '.caf') | |
# convert to 8k .caf with opus codec located in /podcast/, with simplified filename | |
if not file_caf.exists(): | |
afconvert(['-f', 'caff', '-d', 'opus', '-b', '8000', file_m4a, file_caf]) | |
# get the duration by ffprobe | |
stdout = ffprobe(['-v', 'quiet', '-of', 'json', '-hide_banner', '-show_format', '-i', file_caf]) | |
json_data = json.loads(stdout) | |
duration = datetime.timedelta(0, float(json_data['format']['duration'])) | |
itunes_item = iTunesItem( | |
author = author, | |
#image = "", | |
duration = duration, | |
explicit = "clean", | |
subtitle = title, | |
order = int(number), | |
#summary = "" | |
) | |
item = Item( | |
title = number + '. ' + title + ' | ' + author, | |
#link = "http://www.example.com/articles/1", | |
#description = "", | |
author = author, | |
guid = Guid(uuid.uuid3(uuid.NAMESPACE_DNS, str(file_))), | |
pubDate = arrow.Arrow.fromtimestamp(file_.stat().st_mtime), | |
enclosure = Enclosure(url="https://oo.xx.asdf/"+file_caf.name, length=file_caf.stat().st_size, type='audio/aac'), | |
extensions = [itunes_item]) | |
items.append(item) | |
return sorted(items, key=lambda i: i.extensions[0].order, reverse=True) | |
def main(argv = sys.argv[:]): | |
items = get_items() | |
itunes = iTunes( | |
author = 'John Doe', | |
subtitle = 'Some title', | |
summary = 'Some summary', | |
image = 'https://oo.xx.asdf/cover.jpg', | |
explicit = 'clean', | |
categories = iTunesCategory(name = 'Education', subcategory = 'Higher Education'), | |
owner = iTunesOwner(name = 'John Doe', email = '[email protected]')) | |
feed = Feed( | |
title = 'Some title', | |
link = 'https://oo.xx.asdf', | |
description = 'Some description', | |
generator = 'rfeed - An extensible RSS 2.0 Generator written in Python', | |
docs = 'http://blogs.law.harvard.edu/tech/rss', | |
language = 'en-US', | |
pubDate = arrow.now(), | |
lastBuildDate = arrow.now(), | |
items = items, | |
extensions = [itunes]) | |
print(feed.rss()) | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# refer: https://github.com/egorsmkv/rfeed | |
import datetime | |
from rfeed import * | |
itunes_item = iTunesItem( | |
author = "Santiago L. Valdarrama", | |
image = "http://www.example.com/artwork.jpg", | |
duration = "01:11:02", | |
explicit = "clean", | |
subtitle = "The subtitle of the podcast episode", | |
summary = "Here is the summary of this specific episode") | |
item = Item( | |
title = "Sample article", | |
link = "http://www.example.com/articles/1", | |
description = "This is the description of the first article", | |
author = "Santiago L. Valdarrama", | |
guid = Guid("http://www.example.com/articles/1"), | |
pubDate = datetime.datetime(2014, 12, 29, 10, 00), | |
extensions = [itunes_item]) | |
itunes = iTunes( | |
author = "Santiago L. Valdarrama", | |
subtitle = "A sample podcast that will never be produced", | |
summary = "This is just a fake description", | |
image = "http://www.example.com/artwork.jpg", | |
explicit = "clean", | |
categories = iTunesCategory(name = 'Technology', subcategory = 'Software How-To'), | |
owner = iTunesOwner(name = 'Santiago L. Valdarrama', email = '[email protected]')) | |
feed = Feed( | |
title = "Sample Podcast RSS Feed", | |
link = "http://www.example.com/rss" | |
description = "An example of how to generate an RSS 2.0 feed", | |
language = "en-US", | |
lastBuildDate = datetime.datetime.now(), | |
items = [item], | |
extensions = [itunes]) | |
print feed.rss() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
rsync -r -a -v -e ssh podcast/* oo.xx.asdf:public-html/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="utf-8"?> | |
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0"> | |
<channel> | |
<title>讀書 podcast</title> | |
<link>https://oo.xx.asdf/</link> | |
<description>大家一起來讀書</description> | |
<generator>Feeder 3 3.5.8(3057); Mac OS X Version 10.13.3 (Build 17D47) http://reinventedsoftware.com/feeder/</generator> | |
<docs>http://blogs.law.harvard.edu/tech/rss</docs> | |
<language>en</language> | |
<pubDate>Mon, 19 Feb 2018 11:22:18 +0800</pubDate> | |
<lastBuildDate>Mon, 19 Feb 2018 11:22:18 +0800</lastBuildDate> | |
<itunes:image href="https://oo.xx.asdf/cover.jpg"/> | |
<itunes:block>no</itunes:block> | |
<itunes:category text="Education"> | |
<itunes:category text="Higher Education"/> | |
</itunes:category> | |
<itunes:type>episodic</itunes:type> | |
<item> | |
<title>006.《書本六》</title> | |
<description><![CDATA[desc blah blah]]></description> | |
<pubDate>Mon, 19 Feb 2018 11:22:18 +0800</pubDate> | |
<enclosure url="https://oo.xx.asdf/006.m4a" length="1192902" type="audio/aac"/> | |
<guid isPermaLink="false">99B07627-1111-2222-3333-DBC6FCEE9372</guid> | |
<itunes:author>講者三</itunes:author> | |
<itunes:duration>19:52</itunes:duration> | |
<itunes:season>0</itunes:season> | |
<itunes:episode>6</itunes:episode> | |
<itunes:episodeType>full</itunes:episodeType> | |
<content:encoded><![CDATA[note blah blah]]></content:encoded> | |
</item> | |
</channel> | |
</rss> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment