Last active
October 16, 2019 05:01
-
-
Save heyjoeway/236f04bf2cfa8c0f8c12f35d016fbc5f to your computer and use it in GitHub Desktop.
SocialBlade Top Channels Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Paste this code into the developer console while on this page: | |
// https://socialblade.com/youtube/top/5000/mostsubscribed | |
// In Chrome, open the Request Blocking tool, Enable Request Blocking, and add the following URL: | |
// ring.socialblade.com/ding.json | |
// SocialBlade seems to auto-refresh, so lets kill all timeouts to try and prevent that. | |
let id = window.setTimeout(function() {}, 0); | |
while (id--) { | |
window.clearTimeout(id); // will do nothing if no timeout with id is present | |
} | |
// We'll be waiting inbetween requests to not look as much like a spambot. | |
const sleep = (milliseconds) => { | |
return new Promise(resolve => setTimeout(resolve, milliseconds)) | |
} | |
// Filter all the SB urls out of the page. | |
let allHrefUrls = $("#sort-by").parent().find("a").toArray().map(x => $(x).attr("href")); | |
let filteredHrefUrls = allHrefUrls.filter(x => { | |
if (typeof x == "undefined") return false; | |
return x.indexOf("/youtube/") == 0; | |
}); | |
// Now we're gonna actually get the channel ids. | |
let channelIds = []; | |
// Periodically download data, just in case. | |
// Remember to allow automatic downloads for this site! | |
// https://ourcodeworld.com/articles/read/189/how-to-create-a-file-and-generate-a-download-with-javascript-in-the-browser-without-a-server | |
function download(filename, text) { | |
var element = document.createElement('a'); | |
element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text)); | |
element.setAttribute('download', filename); | |
element.style.display = 'none'; | |
document.body.appendChild(element); | |
element.click(); | |
document.body.removeChild(element); | |
} | |
function downloadChannelIds() { | |
download( | |
"backup.json", | |
JSON.stringify(channelIds) | |
); | |
} | |
let downloadInterval = setInterval(() => downloadChannelIds(), 60 * 1000); | |
// If we're aiming to get all 5000 top channels, it will take: | |
// 5000 * 0.5s = 2500s = ~42 min | |
(async () => { | |
for (let i = 0; i < filteredHrefUrls.length; i++) { | |
let sbUrl = filteredHrefUrls[i]; | |
console.log("Current SB URL: " + sbUrl); | |
console.log("Current index: " + i + "/" + (filteredHrefUrls.length - 1)); | |
$.ajax({ | |
url: sbUrl, | |
type: "GET", | |
dataType: "html", | |
success: function(data) { | |
let $html = $(data); | |
let channelId = ($html | |
.find(".fa-youtube-play") | |
.parent("a") | |
.attr("href") | |
.substr(28) | |
); | |
channelIds[i] = channelId; | |
} | |
}); | |
await sleep(500); | |
} | |
clearInterval(downloadInterval); | |
downloadChannelIds(); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment