Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save sahidursuman/60b4659cdb46c9cce7381b7a50cb2a3e to your computer and use it in GitHub Desktop.
Save sahidursuman/60b4659cdb46c9cce7381b7a50cb2a3e to your computer and use it in GitHub Desktop.
Google All In Title Scraper
// This is a small program written in Node. I did NOT know anything about Node until coding this.
// The goal was to mimic this guy right here, without using a framework: https://github.com/carlsednaoui/google-allintitle-scraper
// You'll need a file named keywords.txt
// It will return a file names results.txt
var fs = require('fs'),
http = require('http');
var query = "http://www.google.com/search?q=allintitle:",
searchRegex = /<div id="resultStats">(?:.*?)([\d|,]+)(?:.*?)<\/div>/,
keywords = fs.readFileSync('keywords.txt', 'utf8').split("\n");
keywords.forEach(function(keyword) {
getHttp(query + keyword, function(err, body) {
var answer = searchRegex.exec(body);
answer = keyword + ': ' + ((answer && answer[1]) || 'none') + '\n';
fs.appendFile('results.txt', answer);
});
});
function getHttp(url, callback) {
var response = '';
var req = http.request(url, function(res) {
res.setEncoding('utf8');
res.on('data', function(chunk) { response += chunk; });
res.on('end', function() { callback(null, response); });
}).on('error', function(e) {
console.log(e.message);
});
req.end();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment