Last active
November 29, 2023 18:32
-
-
Save alexose/5f2e933c0d4657de45fae2f607efeb1d to your computer and use it in GitHub Desktop.
Simple YouTube to Podcast RSS (Node.js)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* YouTube to Podcast using Node.js and not much else | |
* | |
* I got annoyed by all of the clunky and/or paid solutions to this problem, so I | |
* thought I'd just roll my own. My guess was that it'd be about 200 lines of code, | |
* and I wasn't too far off. It's not perfect but it works and it's relatively easy | |
* to follow. | |
* | |
* No half-hour Docker installs or 20gb Go libraries to download. Just throw it on a | |
* free EC2 micro server and serve the files using nginx like the good ol days. It | |
* could probably work as a Lambda, too, without too much effort. | |
* | |
* Files are cached in the filesystem, so it's cheap to run as a cron job in order to | |
* continously check for updates. If something goes wrong, just delete the /public | |
* folder and run the script again. | |
* | |
* You'll need to `npm install -g youtube-dl-exec xml2js node-fetch` before running. | |
* Or set everything up with its own directory and package.json file, idc | |
* | |
*/ | |
const https = require("https"); | |
const ytdl = require("youtube-dl-exec"); | |
const fs = require("fs"); | |
const path = require("path"); | |
const xml2js = require("xml2js"); | |
const fetch = require("node-fetch"); | |
const playlistId = "YOUR_YOUTUBE_PLAYLIST_ID"; | |
const apiKey = "YOUR_YOUTUBE_DATA_API_V3_KEY"; | |
const baseUrl = "https://example.com/podcasts/your_podcast"; | |
main(); | |
async function main() { | |
// First, get all playlist entries from the YouTube Data API V3 | |
const entries = await fetchPlaylistItems(playlistId, apiKey); | |
// Next, go through each entry, map all necessary fields, and ensure we have a valid audio file to link to. | |
// If we don't, use youtube-dl to grab the audio only, figure out the duration, and save it to a directory. | |
const processedEntries = await processEntries(entries); | |
// Finally, wrap the full list in a valid RSS feed, convert it to XML, and save to a file. | |
const rssFeedXML = convertToRssFeed(processedEntries); | |
fs.writeFileSync("./public/rss.xml", rssFeedXML); | |
console.log("Done!"); | |
} | |
async function fetchPlaylistItems(playlistId, apiKey, nextPageToken = "") { | |
const url = new URL("https://www.googleapis.com/youtube/v3/playlistItems"); | |
const params = { | |
part: "snippet", | |
maxResults: 50, | |
playlistId: playlistId, | |
key: apiKey, | |
pageToken: nextPageToken, | |
}; | |
url.search = new URLSearchParams(params).toString(); | |
try { | |
const response = await fetch(url); | |
if (!response.ok) { | |
throw new Error(`HTTP error! Status: ${response.status}`); | |
} | |
const data = await response.json(); | |
const entries = data.items; | |
if (data.nextPageToken) { | |
const nextEntries = await fetchPlaylistItems(playlistId, apiKey, data.nextPageToken); | |
return entries.concat(nextEntries); | |
} | |
return entries; | |
} catch (error) { | |
console.error("Error fetching playlist entries:", error); | |
process.exit(1); | |
} | |
} | |
async function processEntries(entries) { | |
let arr = []; | |
for (let pos = 0; pos < entries.length; pos++) { | |
const entry = entries[pos]; | |
const processedEntry = await processSingleEntry(entry); | |
if (processedEntry) arr.push(processedEntry); | |
} | |
return arr; | |
} | |
async function processSingleEntry(unprocessedEntry) { | |
if (!unprocessedEntry) return null; | |
const entry = mapEntry(unprocessedEntry); | |
const id = entry.guid; | |
const audioDir = "./public/audio"; | |
const audioPath = path.join(audioDir, `${id}.mp3`); | |
const infoDir = "./public/info"; | |
const infoPath = path.join(infoDir, `${id}.json`); | |
createDirIfMissing(audioDir); | |
createDirIfMissing(infoDir); | |
// See if cached info exists | |
const info = await fetchInfo(id, infoPath); | |
if (info === null) { | |
// If we can't get info, just bail out since it's probably a deleted video | |
return; | |
} | |
entry["itunes:duration"] = formatDuration(info.duration); | |
if (!fs.existsSync(audioPath)) { | |
console.log(`No audio for ${entry.title} (${audioPath}). Downloading ${info.duration} seconds now...`); | |
try { | |
// Download audio | |
await ytdl(`http://www.youtube.com/watch?v=${id}`, { | |
extractAudio: true, | |
audioFormat: "mp3", | |
output: audioPath, | |
}); | |
} catch (e) { | |
console.error(`Failed to download audio for ${entry.title}:`, e); | |
process.exit(1); | |
} | |
} else { | |
console.log(`Found audio for ${entry.title} (${audioPath}).`); | |
} | |
// Check audio size | |
const stats = fs.statSync(audioPath); | |
entry.enclosure.$.url = `${baseUrl}/${id}.mp3`; | |
entry.enclosure.$.length = stats.size; | |
// All done | |
return entry; | |
} | |
async function fetchInfo(id, infoPath) { | |
if (fs.existsSync(infoPath)) { | |
info = JSON.parse(fs.readFileSync(infoPath)); | |
} else { | |
try { | |
info = await ytdl(`http://www.youtube.com/watch?v=${id}`, {dumpSingleJson: true}); | |
fs.writeFileSync(infoPath, JSON.stringify(info, null, 2)); | |
} catch (e) { | |
// Handle deleted video | |
info = null; | |
fs.writeFileSync(infoPath, "null"); | |
} | |
} | |
return info; | |
} | |
function mapEntry(entry) { | |
const snippet = entry.snippet; | |
// Extract the highest quality thumbnail available | |
const thumbnails = snippet.thumbnails; | |
const thumbnailUrl = | |
thumbnails.standard?.url || thumbnails.high?.url || thumbnails.medium?.url || thumbnails.default?.url; | |
return { | |
title: snippet.title, | |
"itunes:subtitle": snippet.title.substring(0, 100), // Short summary, adjust as needed | |
description: snippet.description, | |
"itunes:summary": snippet.description, | |
pubDate: new Date(snippet.publishedAt).toUTCString(), | |
link: `https://www.youtube.com/watch?v=${snippet.resourceId.videoId}`, | |
guid: snippet.resourceId.videoId, | |
"itunes:author": snippet.channelTitle, | |
enclosure: { | |
$: { | |
type: "audio/mpeg", | |
url: null, // Will be added later | |
length: "0", // Will be added later | |
}, | |
}, | |
"itunes:image": { | |
$: { | |
href: thumbnailUrl, | |
}, | |
}, | |
"itunes:explicit": "no", | |
"itunes:category": { | |
$: { | |
text: "Science", | |
}, | |
}, | |
"itunes:keywords": "Carbon Capture, Climate, Environment", | |
"itunes:duration": "00:00:00", // Will be added later | |
}; | |
} | |
function convertToRssFeed(entries) { | |
const rssObj = { | |
rss: { | |
$: {version: "2.0", "xmlns:itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd"}, | |
channel: { | |
title: "YOUR TITLE", | |
link: "LINK TO YOUTUBE PLAYLIST", | |
description: "DESCRIPTION", | |
language: "en-us", | |
"itunes:author": "AUTHOR", | |
"itunes:image": { | |
$: { | |
href: "IMAGE URL", | |
}, | |
}, | |
"itunes:owner": { | |
"itunes:name": "OWNER NAME", | |
"itunes:email": "OWNER EMAIL", | |
}, | |
"itunes:explicit": "no", | |
"itunes:category": { | |
$: { | |
text: "Science", | |
}, | |
}, | |
item: entries, | |
}, | |
}, | |
}; | |
return new xml2js.Builder().buildObject(rssObj); | |
} | |
// Helper functions | |
function formatDuration(seconds) { | |
const pad = num => num.toString().padStart(2, "0"); | |
const hours = pad(Math.floor(seconds / 3600)); | |
const minutes = pad(Math.floor((seconds % 3600) / 60)); | |
const secondsLeft = pad(seconds % 60); | |
return `${hours}:${minutes}:${secondsLeft}`; | |
} | |
function createDirIfMissing(dir) { | |
if (!fs.existsSync(dir)) { | |
fs.mkdirSync(dir, {recursive: true}); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment