Created
December 13, 2012 14:39
-
-
Save ganglio/4276763 to your computer and use it in GitHub Desktop.
CasperJS: Facebook photos scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var casper = require('casper').create({ | |
verbose : true, | |
logLevel : 'info' | |
}); | |
var images = []; | |
var fs=require("fs") | |
/** | |
* Configuration here | |
*/ | |
var login_username = "username"; | |
var login_password = "password"; | |
var scraped_username = "username2"; | |
/** | |
* Everything starts here! | |
* I use the mobile version of facebook as the DOM is waaay simpler to scrape. | |
*/ | |
casper.start('http://m.facebook.com', function() { | |
// The pretty HUGE viewport allows for roughly 1200 images. | |
// If you need more you can either resize the viewport or scroll down the viewport to load more DOM (probably the best approach). | |
this.viewport(2048,4096); | |
this.fill('form#login_form', { | |
'email': login_username, | |
'pass': login_password | |
}, true); | |
}); | |
casper.thenOpen("https://m.facebook.com/"+scraped_username+"?v=photos", function(){ | |
// We wait four seconds so that the page loaded (the lazy load is amazing for actual users but bots don't like it ;) | |
this.wait(4000,function(){ | |
// Just to be sure we are on the correct page. | |
this.capture("photo_index.png"); | |
// and then we fetch the images | |
images = this.evaluate(function(){ | |
var images = document.querySelectorAll(".timeline.photos i.img"); | |
return Array.prototype.map.call(images,function(e){ | |
return e.style.backgroundImage.match(/url\((.*)\)/)[1].split("/").reverse()[0]; | |
}); | |
}); | |
}); | |
}); | |
casper.then(function(){ | |
// once done we write the images URLs to screen. | |
// I'm still working on a proper way to download the images locally. Any idea? | |
this.each(images, function(self, fname) { | |
var url = "https://fbcdn-photos-a.akamaihd.net/hphotos-ak-ash4/s720x720/"+fname; | |
this.echo(url); | |
}); | |
}); | |
casper.run(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment