Created
October 28, 2015 13:58
-
-
Save petamoriken/f7c1158323331cc4f5d2 to your computer and use it in GitHub Desktop.
PhantomJS では page.evaluate では非同期的なことができないけど XMLHttpRequest($.ajax) を同期的に使うことでなんとかなった()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var phantom = require('node-phantom-async'); | |
var co = require('co'); | |
var buildQuery = function (object) { | |
return Object.keys(object).map(function (key) { | |
return encodeURIComponent(key) + '=' + encodeURIComponent(object[key]); | |
}).join('&'); | |
} | |
var scrapeFromCiNii = function (keywords) { | |
return co(function *() { | |
var params = { | |
q: (typeof keywords === 'string') ? keywords : keywords.join(' '), | |
count: 200 | |
}; | |
var page = yield (yield phantom.create().bind({})).createPage(); | |
yield page.open('http://ci.nii.ac.jp/fulltext?' + buildQuery(params)); | |
yield page.includeJs('https://code.jquery.com/jquery-2.1.4.min.js'); | |
var bibs = yield page.evaluate(function() { | |
var items = []; | |
$('.item_title>a').each(function () { | |
items.push($(this).attr("href") + ".bib"); | |
}); | |
var bibs = []; | |
items.forEach(function(url) { | |
$.ajax({ | |
async: false, | |
url: url, | |
type: "get" | |
}).done(function(data) { | |
bibs.push(data); | |
}); | |
}); | |
return bibs; | |
}); | |
return bibs; | |
}); | |
} | |
scrapeFromCiNii('mptcp').then(function (bibs) { | |
console.log('done'); | |
console.log(bibs); | |
process.exit(0); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment