Last active
July 13, 2017 16:52
-
-
Save avovsya/42db2fefb8a12d1a7dca to your computer and use it in GitHub Desktop.
Web scraping with node and casper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var casper = require('casper').create({ | |
pageSettings: { | |
// javascriptEnabled: false | |
} | |
}); | |
var system = require('system'); | |
var options = casper.cli.options; | |
// Get arguments passed from node-bit.js | |
var url = options.url; | |
var rule = JSON.parse(options.rule); | |
var type = options.type; | |
casper.start(url); | |
// Injecting scrapping code along with supporting libraries | |
casper.then(function () { | |
casper.page.injectJs('./lib/jquery.js'); | |
}); | |
casper.then(function () { | |
casper.page.injectJs('./lib/jquery-extractor.js'); | |
}); | |
casper.then(function () { | |
var result = this.evaluate(function (rule) { | |
// This bit is evaluated in the context of a web page. | |
// Everything that you injected is available here | |
return { | |
products: jqueryExtractor.extract(rule) // This would return data from page context back into casper script | |
}; | |
}, | |
{ rule: rule }); // Passing arguments to function in this.evaluate | |
system.stdout.writeLine(JSON.stringify(result)); // Dump result to the stdout. | |
}); | |
casper.then(function () { | |
casper.exit(); | |
}); | |
casper.run(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function runCasperGetResults (msg, cb) { | |
var casperPath = path.join(__dirname, '/casper.sh'); // this is a path to Casper.js executable | |
var downloaderPath = path.join(__dirname, '/casper-bit.js'); // path to a script that would be executed by casper. | |
var casperArgs = [ // it should dump result to stdout | |
downloaderPath, | |
'--url=' + msg.url, | |
"--rule=" + JSON.stringify(msg.rule), | |
'--type=' + msg.type | |
]; | |
// Synchronyously execute casper script and wait for result on stdout or error in stderr | |
// This can be done asynchronyously by using child_proces.spawn and listening to events on a spawned process. | |
childProcess.execFile(casperPath, casperArgs, function (err, stdout, stderr) { | |
return cb(null, err, stdout, stderr); | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment