Skip to content

Instantly share code, notes, and snippets.

@avovsya
Last active July 13, 2017 16:52
Show Gist options
  • Save avovsya/42db2fefb8a12d1a7dca to your computer and use it in GitHub Desktop.
Save avovsya/42db2fefb8a12d1a7dca to your computer and use it in GitHub Desktop.
Web scraping with node and casper
var casper = require('casper').create({
pageSettings: {
// javascriptEnabled: false
}
});
var system = require('system');
var options = casper.cli.options;
// Get arguments passed from node-bit.js
var url = options.url;
var rule = JSON.parse(options.rule);
var type = options.type;
casper.start(url);
// Injecting scrapping code along with supporting libraries
casper.then(function () {
casper.page.injectJs('./lib/jquery.js');
});
casper.then(function () {
casper.page.injectJs('./lib/jquery-extractor.js');
});
casper.then(function () {
var result = this.evaluate(function (rule) {
// This bit is evaluated in the context of a web page.
// Everything that you injected is available here
return {
products: jqueryExtractor.extract(rule) // This would return data from page context back into casper script
};
},
{ rule: rule }); // Passing arguments to function in this.evaluate
system.stdout.writeLine(JSON.stringify(result)); // Dump result to the stdout.
});
casper.then(function () {
casper.exit();
});
casper.run();
function runCasperGetResults (msg, cb) {
var casperPath = path.join(__dirname, '/casper.sh'); // this is a path to Casper.js executable
var downloaderPath = path.join(__dirname, '/casper-bit.js'); // path to a script that would be executed by casper.
var casperArgs = [ // it should dump result to stdout
downloaderPath,
'--url=' + msg.url,
"--rule=" + JSON.stringify(msg.rule),
'--type=' + msg.type
];
// Synchronyously execute casper script and wait for result on stdout or error in stderr
// This can be done asynchronyously by using child_proces.spawn and listening to events on a spawned process.
childProcess.execFile(casperPath, casperArgs, function (err, stdout, stderr) {
return cb(null, err, stdout, stderr);
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment