Skip to content

Instantly share code, notes, and snippets.

@RolandWarburton
Last active July 22, 2020 08:51
Show Gist options
  • Save RolandWarburton/1a8e1096023b8b45ab2db3f32e6e6dff to your computer and use it in GitHub Desktop.
Save RolandWarburton/1a8e1096023b8b45ab2db3f32e6e6dff to your computer and use it in GitHub Desktop.
youtube Link Extractor from a file of js links
// __ __ __ __ ___ ____ ____
// \ \/ /___ __ __/ /___ __/ /_ ___ / | / __ \/ _/
// \ / __ \/ / / / __/ / / / __ \/ _ \ / /| | / /_/ // /
// / / /_/ / /_/ / /_/ /_/ / /_/ / __/ / ___ |/ ____// /
// /_/\____/\__,_/\__/\__,_/_.___/\___/ /_/ |_/_/ /___/
// Extract {title, channelId, publishedAt} from an array of youtube links
// Make sure to use an API key below
//
// A good way i found of getting lists of youtube urls is to use Link Gopher.
// Though the results do require some minimal filtering to get only youtube watch URLs
// https://addons.mozilla.org/en-US/firefox/addon/link-gopher/
// ? Example input data:
//
// [
// "https://www.youtube.com/watch?v=yuRaiarXYuM&list=LLFPStYdObZlOzzjVLJntQyQ&index=3353&t=0s",
// "https://www.youtube.com/watch?v=yuRaiarXYuM",
// "https://www.youtube.com/watch?v=hffu2JNcYV0",
// ]
// ? Example return data:
//
// {
// "title": "King Gizzard & The Lizard Wizard - Quarters! (Full Album)",
// "channelId": "UCE0Y8aON3Gt6SkzFpA9uDkg",
// "publishedAt": "2015-07-08T02:30:10Z"
// },
// {
// "title": "You'll be back Animatic",
// "channelId": "UC99Dg2_ZKyGRaVlfgugQOUw",
// "publishedAt": "2016-10-18T20:18:15Z"
// },
// {
// "title": "Angra - No Pain For The Dead",
// "channelId": "UCYNG2ZhvDdniIHRT0Cuo3XQ",
// "publishedAt": "2008-10-14T06:07:21Z"
// },
const fs = require("fs");
const path = require("path");
const { google } = require("googleapis");
const { parse } = require("node-html-parser");
// ! API KEY HERE
const key = "YOUR API KEY HERE";
/**
* Returns all the links from a .html file.
* This step can be skipped if you already have an array of links that you can "read in" in main
* @example
* getLinks("./links.html")
*/
const getLinks = (filepath) => {
// read the urls in and parse them into a DOM node structure
const videos = parse(fs.readFileSync(filepath, "utf8"));
// grab all the urls from the html file
const urls = videos.querySelectorAll("a");
// individual hrefs go here
const links = [];
for (const domURL of urls) {
const link = domURL.rawAttributes.href;
if (link) links.push(domURL.rawAttributes.href);
}
return links;
};
/**
* auth with youtube v3 api πŸ”’
* Make sure you define the API key at the top of this file
*/
const youtube = google.youtube({
version: "v3",
auth: key,
});
// TODO find a way to use Oauth to get a users private playlist to make this way more efficient.
/**
* Query Youtube api for the video ID. πŸ”
* Returns a promise that resolves to an array where
* the id=id so its super expensive and probably shouldnt be done like this
* @param {String} id - ID of the videa
* @example
* search("hffu2JNcYV0")
*/
const search = async (id) => {
return youtube.videos
.list({
part: "id,snippet",
id: id,
})
.then((res) => {
if (res.data) {
return {
title: res.data.items[0].snippet.title,
channelId: res.data.items[0].snippet.channelId,
publishedAt: res.data.items[0].snippet.publishedAt,
};
}
})
.catch((err) => {
console.log(err);
});
};
/**
* Super jank way of extracting the video ID from a URL
* @param {String} link
*/
const stripID = (link) => {
return link.substring(32, 43);
};
/**
* Return a new array of results that have all null values removed
* @param {JSON} data
*/
const filterNullValues = (data) => {
// return all elems where elem exists
return data.filter((elem) => !!elem);
};
// Run! πŸš€
const main = async () => {
const links = getLinks();
const stats = [];
for (const link of links) {
stats.push(search(stripID(link)));
}
const data = await Promise.all(stats);
// write the data!
fs.writeFileSync("./output.json", JSON.stringify(data));
};
module.exports = main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment