Last active
July 22, 2020 08:51
-
-
Save RolandWarburton/1a8e1096023b8b45ab2db3f32e6e6dff to your computer and use it in GitHub Desktop.
youtube Link Extractor from a file of js links
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// __ __ __ __ ___ ____ ____ | |
// \ \/ /___ __ __/ /___ __/ /_ ___ / | / __ \/ _/ | |
// \ / __ \/ / / / __/ / / / __ \/ _ \ / /| | / /_/ // / | |
// / / /_/ / /_/ / /_/ /_/ / /_/ / __/ / ___ |/ ____// / | |
// /_/\____/\__,_/\__/\__,_/_.___/\___/ /_/ |_/_/ /___/ | |
// Extract {title, channelId, publishedAt} from an array of youtube links | |
// Make sure to use an API key below | |
// | |
// A good way i found of getting lists of youtube urls is to use Link Gopher. | |
// Though the results do require some minimal filtering to get only youtube watch URLs | |
// https://addons.mozilla.org/en-US/firefox/addon/link-gopher/ | |
// ? Example input data: | |
// | |
// [ | |
// "https://www.youtube.com/watch?v=yuRaiarXYuM&list=LLFPStYdObZlOzzjVLJntQyQ&index=3353&t=0s", | |
// "https://www.youtube.com/watch?v=yuRaiarXYuM", | |
// "https://www.youtube.com/watch?v=hffu2JNcYV0", | |
// ] | |
// ? Example return data: | |
// | |
// { | |
// "title": "King Gizzard & The Lizard Wizard - Quarters! (Full Album)", | |
// "channelId": "UCE0Y8aON3Gt6SkzFpA9uDkg", | |
// "publishedAt": "2015-07-08T02:30:10Z" | |
// }, | |
// { | |
// "title": "You'll be back Animatic", | |
// "channelId": "UC99Dg2_ZKyGRaVlfgugQOUw", | |
// "publishedAt": "2016-10-18T20:18:15Z" | |
// }, | |
// { | |
// "title": "Angra - No Pain For The Dead", | |
// "channelId": "UCYNG2ZhvDdniIHRT0Cuo3XQ", | |
// "publishedAt": "2008-10-14T06:07:21Z" | |
// }, | |
const fs = require("fs"); | |
const path = require("path"); | |
const { google } = require("googleapis"); | |
const { parse } = require("node-html-parser"); | |
// ! API KEY HERE | |
const key = "YOUR API KEY HERE"; | |
/** | |
* Returns all the links from a .html file. | |
* This step can be skipped if you already have an array of links that you can "read in" in main | |
* @example | |
* getLinks("./links.html") | |
*/ | |
const getLinks = (filepath) => { | |
// read the urls in and parse them into a DOM node structure | |
const videos = parse(fs.readFileSync(filepath, "utf8")); | |
// grab all the urls from the html file | |
const urls = videos.querySelectorAll("a"); | |
// individual hrefs go here | |
const links = []; | |
for (const domURL of urls) { | |
const link = domURL.rawAttributes.href; | |
if (link) links.push(domURL.rawAttributes.href); | |
} | |
return links; | |
}; | |
/** | |
* auth with youtube v3 api π | |
* Make sure you define the API key at the top of this file | |
*/ | |
const youtube = google.youtube({ | |
version: "v3", | |
auth: key, | |
}); | |
// TODO find a way to use Oauth to get a users private playlist to make this way more efficient. | |
/** | |
* Query Youtube api for the video ID. π | |
* Returns a promise that resolves to an array where | |
* the id=id so its super expensive and probably shouldnt be done like this | |
* @param {String} id - ID of the videa | |
* @example | |
* search("hffu2JNcYV0") | |
*/ | |
const search = async (id) => { | |
return youtube.videos | |
.list({ | |
part: "id,snippet", | |
id: id, | |
}) | |
.then((res) => { | |
if (res.data) { | |
return { | |
title: res.data.items[0].snippet.title, | |
channelId: res.data.items[0].snippet.channelId, | |
publishedAt: res.data.items[0].snippet.publishedAt, | |
}; | |
} | |
}) | |
.catch((err) => { | |
console.log(err); | |
}); | |
}; | |
/** | |
* Super jank way of extracting the video ID from a URL | |
* @param {String} link | |
*/ | |
const stripID = (link) => { | |
return link.substring(32, 43); | |
}; | |
/** | |
* Return a new array of results that have all null values removed | |
* @param {JSON} data | |
*/ | |
const filterNullValues = (data) => { | |
// return all elems where elem exists | |
return data.filter((elem) => !!elem); | |
}; | |
// Run! π | |
const main = async () => { | |
const links = getLinks(); | |
const stats = []; | |
for (const link of links) { | |
stats.push(search(stripID(link))); | |
} | |
const data = await Promise.all(stats); | |
// write the data! | |
fs.writeFileSync("./output.json", JSON.stringify(data)); | |
}; | |
module.exports = main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment