Skip to content

Instantly share code, notes, and snippets.

@gasolin
Created April 23, 2014 16:17
Show Gist options
  • Select an option

  • Save gasolin/11221952 to your computer and use it in GitHub Desktop.

Select an option

Save gasolin/11221952 to your computer and use it in GitHub Desktop.
var http = require("http");
var cheerio = require("cheerio");
/**
* Utility function that downloads a URL and invokes
* callback with the data.
*/
function download(url, callback) {
http.get(url, function(res) {
var data = "";
res.on('data', function (chunk) {
data += chunk;
});
res.on("end", function() {
callback(data);
});
}).on("error", function() {
callback(null);
});
}
// DO THE JOB
var url = "http://axe-level-1.herokuapp.com/lv2/";
var resultJson = [];
var column_title = ["town", "village", "name"];
download(url, function(data) {
if (data) {
// console.log(data);
var $ = cheerio.load(data);
$("a").each(function(i, e) {
var href = $(e).attr("href");
href = 'http://axe-level-1.herokuapp.com/lv2/' + href;
console.log('parse ' + href);
parse(href);
});
// console.log(JSON.stringify(resultJson));
}
});
function parse(path) {
download(path, function(data) {
if (data) {
// console.log(data);
var $ = cheerio.load(data);
$("tr").each(function(i, e) {
var columns = $(e).find("td");
// column holder
var person = {};
columns.each(function(j, e) {
person[column_title[j]] = $(e).text();
});
resultJson.push(person);
});
}
console.log("=======");
console.log(JSON.stringify(resultJson));
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment