Created
May 15, 2014 15:10
-
-
Save akiyoshi83/641723bb5750db4744bd to your computer and use it in GitHub Desktop.
CasperJS scraping sample
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var system = require('system'); | |
var fs = require('fs'); | |
var args = system.args; | |
// args is phantomjs api. | |
// args [3] and later arguments of casperjs | |
var _args = args.slice(3); | |
if(_args.length < 2) { | |
console.log("USAGE: casperjs scrape.js URL"); | |
phantom.exit(); | |
} | |
// parse arguments | |
// -------------------- | |
var url = _args[1] || ''; | |
var domain, path, fname; | |
var m = url.match(/https?:¥/¥/([^¥/]+)(¥/?.*)/); | |
if(!m) { | |
console.log("casperjs scrape.js URL"); | |
phantom.exit(); | |
} | |
domain = m[1]; | |
path = m[2]; | |
fname = m[3] || 'index.html'; | |
console.debug('domain: ' + domain); | |
console.debug('path: ' + path); | |
console.debug('fname: ' + fname); | |
// scraping by casperjs | |
// -------------------- | |
var casper = require('casper').create(); | |
casper.start(url, function() { | |
this.evaluate(function() { | |
// to prevent from transparent background color | |
document.body.bgColor = 'white'; | |
}); | |
// TODO make directory hierarchy | |
this.capture(fname + '.png'); | |
fs.write(fname, this.getHTML(), 'w'); | |
// TODO download resources | |
this.exit(); | |
}); | |
casper.run(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LANG="ja_JP.SJIS" | |
LC_ALL="ja_JP.SJIS" | |
casperjs scrape.js $1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment