Last active
September 17, 2017 20:03
-
-
Save gildas-lormeau/1886f05b5eb6bab399ae880d166262bf to your computer and use it in GitHub Desktop.
Update Facebook cache with a sitemap file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Update Facebook cache with a sitemap file | |
// Syntax: | |
// | |
// node index <sitemap URL> [delay between facebook API calls in ms] | |
// | |
// example: | |
// > node index https://example.com/sitemap_index.xml | |
// | |
"use strict"; | |
const https = require("https"); | |
const http = require("http"); | |
const URL = require("url"); | |
run(process.argv.reverse()); | |
function run(args) { | |
let fbRequestPause = 1000; | |
let sitemapURL; | |
if (!isNaN(args[0])) { | |
fbRequestPause = Math.max(args[0], 500); | |
sitemapURL = args[1]; | |
} else { | |
sitemapURL = args[0]; | |
} | |
if (/https?/.test(URL.parse(sitemapURL).protocol)) { | |
getSitemapURLs(sitemapURL, []) | |
.then(urls => updateFacebookCache(urls, fbRequestPause, 0)) | |
.then(() => console.log("finished!")) | |
.catch(console.error); | |
} else { | |
console.error("Error: missing or invalid parameter (sitemap URL)"); | |
} | |
} | |
function getSitemapURLs(sitemapURL, urls) { | |
return retrieveSitemapContent(sitemapURL) | |
.then(sitemapContent => { | |
const locURLs = sitemapContent | |
.split(/<loc>(.*?)<\/loc>/) | |
.filter((value, index) => index % 2); | |
if (/^<sitemapindex/.test(sitemapContent) && /<\/sitemapindex>$/.test(sitemapContent)) { | |
return Promise.all(locURLs.map(url => getSitemapURLs(url, urls))) | |
.then(() => urls); | |
} else { | |
Array.prototype.splice.apply(urls, [0, 0].concat(locURLs)); | |
return urls; | |
} | |
}); | |
} | |
function retrieveSitemapContent(sitemapURL) { | |
return new Promise((resolve, reject) => { | |
let content = ""; | |
const url = URL.parse(sitemapURL); | |
const request = (url.protocol == "https" ? https : http).request({ | |
method: "GET", | |
hostname: url.hostname, | |
path: url.pathname, | |
}, response => { | |
response.on("data", data => content += data); | |
response.on("end", () => resolve(content)); | |
response.on("error", reject); | |
}); | |
request.end(); | |
request.on("error", reject); | |
}); | |
} | |
function updateFacebookCache(urls, fbRequestPause, index) { | |
return new Promise((resolve, reject) => { | |
const url = urls[index]; | |
if (url) { | |
console.log(">", url); | |
updateFacebookCacheURL(url) | |
.then(() => setTimeout(updateFacebookCache, fbRequestPause, urls, fbRequestPause, index + 1)) | |
.catch(reject); | |
} else { | |
resolve(); | |
} | |
}); | |
} | |
function updateFacebookCacheURL(url) { | |
return new Promise((resolve, reject) => { | |
const postData = "id=" + encodeURI(url) + "&scrape=true"; | |
const request = https.request({ | |
method: "POST", | |
hostname: "graph.facebook.com", | |
path: "/", | |
headers: { | |
"Content-Type": "application/x-www-form-urlencoded", | |
"Content-Length": Buffer.byteLength(postData) | |
} | |
}, response => { | |
response.on("data", () => { }); | |
response.on("end", resolve); | |
response.on("error", reject); | |
}); | |
request.write(postData); | |
request.end(); | |
request.on("error", reject); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment