Skip to content

Instantly share code, notes, and snippets.

@KATT
Created March 4, 2014 13:58
Show Gist options
  • Save KATT/9346937 to your computer and use it in GitHub Desktop.
Save KATT/9346937 to your computer and use it in GitHub Desktop.
CasperJS, scrape a FB message history for images and save the URLs in a file
var casper = require('casper').create({
verbose: true,
logLevel: 'debug',
pageSettings: {
loadImages: false, // The WebPage instance used by Casper will
loadPlugins: false, // use these settings
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.4'
}
});
var images = [];
var msgs = [];
var fs=require("fs")
/**
* Configuration here
*/
var login_username = "login_username";
var login_password = "login_password";
/**
* Everything starts here!
* I use the mobile version of facebook as the DOM is waaay simpler to scrape.
*/
casper.start('http://m.facebook.com', function() {
this.viewport(320,4096);
this.fill('form#login_form', {
'email': login_username,
'pass': login_password
}, true);
this.capture("photo_index.png");
});
var findAttachments = function findAttachments() {
var aNodes = document.querySelectorAll('a');
var list = []
for (var k in aNodes) {
var href = aNodes[k].href;
if (/attachment/.test(aNodes[k].href)) {
list.push(href);
}
}
return list;
};
var handleAttachment = function handleAttachment() {
this.log('handling attachment response', 'info');
};
var handle = function(document) {
this.log('handling message response', 'info');
casper.then(function() {
var nextUrl = this.evaluate(function() {
return document.getElementById('see_older').querySelector('a').href;
});
fs.write('next.txt', nextUrl, 'w');
var attachments = this.evaluate(findAttachments);
fs.write('attachments.txt', attachments.join("\n"), 'a');
if (!next) {
this.log('something happened!', 'error');
return;
}
casper.thenOpen(nextUrl, handle);
});
this.capture("message.png");
};
var next = fs.read('next.txt');
casper.thenOpen(next, handle);
casper.run();
https://m.facebook.com/messages/read/?tid=[facebook-conversation-id]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment