Last active
December 25, 2015 01:09
-
-
Save bjonord/6892374 to your computer and use it in GitHub Desktop.
Wikiscraper for the geo-tag at each url.
Horrible code, first attempt at using Node.js.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
China;http://en.wikipedia.org/wiki/China | |
Philippines;http://en.wikipedia.org/wiki/Philippines | |
Sweden;http://en.wikipedia.org/wiki/Sweden | |
Italy;http://en.wikipedia.org/wiki/Italy | |
Brazil;http://en.wikipedia.org/wiki/Brazil |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fs = require('fs'); | |
YQL = require('yql'); | |
var urls = fs.readFileSync('./target_urls.txt', 'utf-8').toString().split('\n') | |
var query = 'SELECT * FROM data.html.cssselect WHERE (url=@url) AND (css=@css)'; | |
var file = 'coordinates.txt'; | |
fs.writeFile(file, ""); | |
console.log("Finding the coordinates, wait moment or two."); | |
urls.forEach(function(row){ | |
if( row != "" ){ | |
var split_row = row.split(';'); | |
var target = split_row[0]; | |
new YQL.exec(query, function(response) { | |
console.log("Currently finding the coordinates for " + target); | |
var response = response.query; | |
if (response.results.results != null){ | |
var resp = response.results.results.span; | |
var str = target + '-['; | |
if (resp.length > 1){ | |
fs.appendFile(file, str + resp[0].content + ']\n'); | |
} else { | |
fs.appendFile(file, str + resp.content + ']\n'); | |
} | |
} | |
}, {"url": split_row[1], "css": ".geo"}); | |
} | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment