Created
March 19, 2017 20:12
-
-
Save andreasasprou/10345eedc92909f43e7665146d407706 to your computer and use it in GitHub Desktop.
Minimal Facebook profile scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var fs = require('fs'); | |
var casper = require('casper').create({ | |
verbose: true, | |
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36", | |
pageSettings: { | |
loadImages: false, | |
loadPlugins: false | |
} | |
}); | |
var args = { | |
username: casper.cli.get('username'), | |
}; | |
var cookiesPath = 'cookies/cookies.txt'; | |
loadCookies(cookiesPath); | |
var start = Date.now(); // To output time taken | |
var email = '[email protected]'; | |
var password = 'facebook password'; | |
// Load the page with a wide display for debugging purposes | |
casper.start('https://www.facebook.com/login.php') | |
.viewport(1600,1000); | |
casper.then(function() { | |
var title = this.getTitle(); | |
casper.echo(title); | |
// If cookies have expired | |
if (title !== 'Facebook') { | |
casper.waitForSelector('#login_form', function() { | |
casper.echo('Logging in'); | |
this.sendKeys('input[name=email]', email); | |
this.sendKeys('input[name=pass]', password); | |
this.click('button[name=login]'); | |
// Wait till login disappears | |
casper.waitWhileSelector("#login_form"); | |
saveCookies('cookies/cookies.txt'); | |
}); | |
} | |
}); | |
// Open the users page | |
casper.thenOpen('https://www.facebook.com/' + args.username + '/'); | |
casper.waitForSelector('#profile_intro_card', function() { | |
console.log(this.evaluate(getProfileItems).filter(function(item) { | |
return item != ''; | |
})); | |
}) | |
function getProfileItems() { | |
var profileItems = document.querySelectorAll('#profile_intro_card ._5xu4'); | |
return Array.prototype.map.call(profileItems, function (item) { | |
var text = item.innerText; | |
if (text === 'Details') return ''; | |
return text; | |
}); | |
} | |
casper.run(function() { | |
console.log('Time taken: ', (Date.now() - start)/1000, 's'); | |
this.exit(); | |
}); | |
// Debugging helper to capture the current screen | |
function captureLarge(casper, filename) { | |
casper.capture(filename, { | |
top: 0, | |
left: 0, | |
width: 1600, | |
height: 1000, | |
}); | |
} | |
function saveCookies(cookiesPath) { | |
var cookies = JSON.stringify(phantom.cookies); | |
fs.write(cookiesPath, cookies, 644); | |
} | |
function loadCookies(cookiesPath) { | |
var data = fs.read(cookiesPath); | |
phantom.cookies = JSON.parse(data); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment