Skip to content

Instantly share code, notes, and snippets.

@LinZap
Created January 11, 2016 08:38
Show Gist options
  • Save LinZap/a2be37935baad04b428e to your computer and use it in GitHub Desktop.
Save LinZap/a2be37935baad04b428e to your computer and use it in GitHub Desktop.
wikipedia_crawler for NodeJS
var request = require("request"),
fs = require("fs");
var options = {
url: 'https://zh.wikipedia.org/w/api.php?format=json&action=parse&pageid=13',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
'Cookie' : 'TBLkisOn=0; CP=H2; WMF-Last-Access=11-Jan-2016; GeoIP=:::::v6',
'Accept-Language' : 'zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4'
}
};
function callback(error, response, body) {
if (!error && response.statusCode == 200) {
var d = JSON.stringify(JSON.parse(body));
// console.log(info.forks_count + " Forks");
fs.writeFileSync( 'data.json' , d , 'utf8');
}
}
request(options, callback);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment