Skip to content

Instantly share code, notes, and snippets.

@gildas-lormeau
Last active September 17, 2017 20:03
Show Gist options
  • Save gildas-lormeau/1886f05b5eb6bab399ae880d166262bf to your computer and use it in GitHub Desktop.
Save gildas-lormeau/1886f05b5eb6bab399ae880d166262bf to your computer and use it in GitHub Desktop.
Update Facebook cache with a sitemap file
//
// Update Facebook cache with a sitemap file
// Syntax:
//
// node index <sitemap URL> [delay between facebook API calls in ms]
//
// example:
// > node index https://example.com/sitemap_index.xml
//
"use strict";
const https = require("https");
const http = require("http");
const URL = require("url");
run(process.argv.reverse());
function run(args) {
let fbRequestPause = 1000;
let sitemapURL;
if (!isNaN(args[0])) {
fbRequestPause = Math.max(args[0], 500);
sitemapURL = args[1];
} else {
sitemapURL = args[0];
}
if (/https?/.test(URL.parse(sitemapURL).protocol)) {
getSitemapURLs(sitemapURL, [])
.then(urls => updateFacebookCache(urls, fbRequestPause, 0))
.then(() => console.log("finished!"))
.catch(console.error);
} else {
console.error("Error: missing or invalid parameter (sitemap URL)");
}
}
function getSitemapURLs(sitemapURL, urls) {
return retrieveSitemapContent(sitemapURL)
.then(sitemapContent => {
const locURLs = sitemapContent
.split(/<loc>(.*?)<\/loc>/)
.filter((value, index) => index % 2);
if (/^<sitemapindex/.test(sitemapContent) && /<\/sitemapindex>$/.test(sitemapContent)) {
return Promise.all(locURLs.map(url => getSitemapURLs(url, urls)))
.then(() => urls);
} else {
Array.prototype.splice.apply(urls, [0, 0].concat(locURLs));
return urls;
}
});
}
function retrieveSitemapContent(sitemapURL) {
return new Promise((resolve, reject) => {
let content = "";
const url = URL.parse(sitemapURL);
const request = (url.protocol == "https" ? https : http).request({
method: "GET",
hostname: url.hostname,
path: url.pathname,
}, response => {
response.on("data", data => content += data);
response.on("end", () => resolve(content));
response.on("error", reject);
});
request.end();
request.on("error", reject);
});
}
function updateFacebookCache(urls, fbRequestPause, index) {
return new Promise((resolve, reject) => {
const url = urls[index];
if (url) {
console.log(">", url);
updateFacebookCacheURL(url)
.then(() => setTimeout(updateFacebookCache, fbRequestPause, urls, fbRequestPause, index + 1))
.catch(reject);
} else {
resolve();
}
});
}
function updateFacebookCacheURL(url) {
return new Promise((resolve, reject) => {
const postData = "id=" + encodeURI(url) + "&scrape=true";
const request = https.request({
method: "POST",
hostname: "graph.facebook.com",
path: "/",
headers: {
"Content-Type": "application/x-www-form-urlencoded",
"Content-Length": Buffer.byteLength(postData)
}
}, response => {
response.on("data", () => { });
response.on("end", resolve);
response.on("error", reject);
});
request.write(postData);
request.end();
request.on("error", reject);
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment