Created
April 11, 2013 16:24
-
-
Save dandrews/5364857 to your computer and use it in GitHub Desktop.
Scrape mentors details from http://www.techstars.com/program/mentors/#all using CasperJS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var url = 'http://www.techstars.com/program/mentors/#all'; | |
var casper = require('casper').create({ | |
// verbose: true, | |
// logLevel: 'debug', | |
pageSettings: { | |
loadImages: false, | |
loadPlugins: false | |
} | |
}); | |
casper.on('error', function(msg, backtrace) { | |
this.echo("========================="); | |
this.echo("ERROR:"); | |
this.echo(msg); | |
this.echo(backtrace); | |
this.echo("========================="); | |
}); | |
casper.on("page.error", function(msg, backtrace) { | |
this.echo("========================="); | |
this.echo("PAGE.ERROR:"); | |
this.echo(msg); | |
this.echo(backtrace); | |
this.echo("========================="); | |
}); | |
var mentors = [] | |
function getMentorName() { | |
return document.querySelector('h1.entry-title a').innerText | |
} | |
function getMentorImg() { | |
return document.querySelector('header.entry-header img').getAttribute('src') | |
} | |
function getMentorDesc() { | |
return document.querySelector('div.entry-content').innerHTML | |
} | |
function getMentors() { | |
var mentors = document.querySelectorAll('li.mentor'); | |
return Array.prototype.map.call(mentors, function(e) { | |
return e.getElementsByTagName('a')[0].getAttribute('href') | |
}); | |
} | |
casper.start( url ); | |
casper.then(function() { | |
mentors = this.evaluate(getMentors); | |
}); | |
casper.then( function() { | |
this.each(mentors, function(self, mentor) { | |
self.thenOpen(mentor, function() { | |
var mentor_name = this.evaluate(getMentorName); | |
var mentor_img = this.evaluate(getMentorImg); | |
var mentor_desc = this.evaluate(getMentorDesc); | |
this.echo( '"' + mentor_name + '", "' + mentor_img + '", "' + mentor_desc + '",' ); | |
}); | |
}); | |
}); | |
casper.run(function() { | |
this.exit(); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
casperjs mentors.js 2>&1 | tee mentors.txt
webscraping #phatomjs #casperjs #techstars