Skip to content

Instantly share code, notes, and snippets.

@akiyoshi83
Created May 15, 2014 15:10
Show Gist options
  • Save akiyoshi83/641723bb5750db4744bd to your computer and use it in GitHub Desktop.
Save akiyoshi83/641723bb5750db4744bd to your computer and use it in GitHub Desktop.
CasperJS scraping sample
var system = require('system');
var fs = require('fs');
var args = system.args;
// args is phantomjs api.
// args [3] and later arguments of casperjs
var _args = args.slice(3);
if(_args.length < 2) {
console.log("USAGE: casperjs scrape.js URL");
phantom.exit();
}
// parse arguments
// --------------------
var url = _args[1] || '';
var domain, path, fname;
var m = url.match(/https?:¥/¥/([^¥/]+)(¥/?.*)/);
if(!m) {
console.log("casperjs scrape.js URL");
phantom.exit();
}
domain = m[1];
path = m[2];
fname = m[3] || 'index.html';
console.debug('domain: ' + domain);
console.debug('path: ' + path);
console.debug('fname: ' + fname);
// scraping by casperjs
// --------------------
var casper = require('casper').create();
casper.start(url, function() {
this.evaluate(function() {
// to prevent from transparent background color
document.body.bgColor = 'white';
});
// TODO make directory hierarchy
this.capture(fname + '.png');
fs.write(fname, this.getHTML(), 'w');
// TODO download resources
this.exit();
});
casper.run();
LANG="ja_JP.SJIS"
LC_ALL="ja_JP.SJIS"
casperjs scrape.js $1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment