Skip to content

Instantly share code, notes, and snippets.

@bjonord
Last active December 25, 2015 01:09
Show Gist options
  • Save bjonord/6892374 to your computer and use it in GitHub Desktop.
Save bjonord/6892374 to your computer and use it in GitHub Desktop.
Wikiscraper for the geo-tag at each url. Horrible code, first attempt at using Node.js.
China;http://en.wikipedia.org/wiki/China
Philippines;http://en.wikipedia.org/wiki/Philippines
Sweden;http://en.wikipedia.org/wiki/Sweden
Italy;http://en.wikipedia.org/wiki/Italy
Brazil;http://en.wikipedia.org/wiki/Brazil
fs = require('fs');
YQL = require('yql');
var urls = fs.readFileSync('./target_urls.txt', 'utf-8').toString().split('\n')
var query = 'SELECT * FROM data.html.cssselect WHERE (url=@url) AND (css=@css)';
var file = 'coordinates.txt';
fs.writeFile(file, "");
console.log("Finding the coordinates, wait moment or two.");
urls.forEach(function(row){
if( row != "" ){
var split_row = row.split(';');
var target = split_row[0];
new YQL.exec(query, function(response) {
console.log("Currently finding the coordinates for " + target);
var response = response.query;
if (response.results.results != null){
var resp = response.results.results.span;
var str = target + '-[';
if (resp.length > 1){
fs.appendFile(file, str + resp[0].content + ']\n');
} else {
fs.appendFile(file, str + resp.content + ']\n');
}
}
}, {"url": split_row[1], "css": ".geo"});
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment