Created
March 4, 2014 13:58
-
-
Save KATT/9346937 to your computer and use it in GitHub Desktop.
CasperJS, scrape a FB message history for images and save the URLs in a file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var casper = require('casper').create({ | |
verbose: true, | |
logLevel: 'debug', | |
pageSettings: { | |
loadImages: false, // The WebPage instance used by Casper will | |
loadPlugins: false, // use these settings | |
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.4' | |
} | |
}); | |
var images = []; | |
var msgs = []; | |
var fs=require("fs") | |
/** | |
* Configuration here | |
*/ | |
var login_username = "login_username"; | |
var login_password = "login_password"; | |
/** | |
* Everything starts here! | |
* I use the mobile version of facebook as the DOM is waaay simpler to scrape. | |
*/ | |
casper.start('http://m.facebook.com', function() { | |
this.viewport(320,4096); | |
this.fill('form#login_form', { | |
'email': login_username, | |
'pass': login_password | |
}, true); | |
this.capture("photo_index.png"); | |
}); | |
var findAttachments = function findAttachments() { | |
var aNodes = document.querySelectorAll('a'); | |
var list = [] | |
for (var k in aNodes) { | |
var href = aNodes[k].href; | |
if (/attachment/.test(aNodes[k].href)) { | |
list.push(href); | |
} | |
} | |
return list; | |
}; | |
var handleAttachment = function handleAttachment() { | |
this.log('handling attachment response', 'info'); | |
}; | |
var handle = function(document) { | |
this.log('handling message response', 'info'); | |
casper.then(function() { | |
var nextUrl = this.evaluate(function() { | |
return document.getElementById('see_older').querySelector('a').href; | |
}); | |
fs.write('next.txt', nextUrl, 'w'); | |
var attachments = this.evaluate(findAttachments); | |
fs.write('attachments.txt', attachments.join("\n"), 'a'); | |
if (!next) { | |
this.log('something happened!', 'error'); | |
return; | |
} | |
casper.thenOpen(nextUrl, handle); | |
}); | |
this.capture("message.png"); | |
}; | |
var next = fs.read('next.txt'); | |
casper.thenOpen(next, handle); | |
casper.run(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
https://m.facebook.com/messages/read/?tid=[facebook-conversation-id] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment