gelendir · November 27, 2010 18:35
diff --git a/infb.css b/infb.css
 /*
 	Ruel Pagayon (c) 2010 - ruel@ruel.me
 	
 	Cascading Style Sheet for InFB Log Output.
 */
 body {
 	background-color: #3C3C3C;
 	color: #FFF;
 	margin-top: 50px;
 	margin-left: 25px;
 	font-size: xx-small;
 	font-family: Calibri, Arial, sans;
 }
 .rby {
 	text-align: center;
 	font-size: xx-small;
 }

 table  {
 	text-align: center;
 }

 td {
 	padding-top: 0.5em;
 	padding-bottom: 0.5em;
 	padding-left: 1em;
 	padding-right: 1em;
 	text-align: left;
 	font-size: small;
 }

 td.num {
 	color: #CCC;
 }

 td.cnum {
 	color: #AFAFAF;
 }

 a:active, a:visited, a:link  {
 	color: #FFF;
 	font-weight: bold;
 	text-decoration: none;
 }

 a:hover {
 	color: #FFF;
 	font-weight: bold;
 	text-decoration: underline;
 }
diff --git a/infb.py b/infb.py
 #!/usr/bin/python
 #
 #	InFB - Information Facebook
 #	Usage: infb.py user@domain.tld password
 #	http://ruel.me
 #
 #	Copyright (c) 2010, Ruel Pagayon - ruel@ruel.me
 #	All rights reserved.
 #
 #	Redistribution and use in source and binary forms, with or without
 #		* Redistributions of source code must retain the above copyright
 #		  notice, this list of conditions and the following disclaimer.
 #		* Redistributions in binary form must reproduce the above copyright
 #		  notice, this list of conditions and the following disclaimer in the
 #		  documentation and/or other materials provided with the distribution.
 #		* Neither the name of ruel.me nor the names of its contributors
 #		  may be used to endorse or promote products derived from this
 #		  script without specific prior written permission.
 #
 #	THIS SCRIPT IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 #	ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 #	WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 #	DISCLAIMED. IN NO EVENT SHALL RUEL PAGAYON BE LIABLE FOR ANY
 #	DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 #	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 #	LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 #	ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 #	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 #	SCRIPT, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


 import sys, re, urllib, urllib2, cookielib, HTMLParser, getpass

 class FormScraper(HTMLParser.HTMLParser):
    """
    Scrapes the Facebook login page for form values that need to be submitted on login.
    Necessary because the form values change each time the login page is loaded.

    Usage:
    form_scraper = FormScraper()
    form_scraper.feed(html_from_facebook)
    form_values = form_scraper.values
    """

    def __init__(self, *args, **kwargs):
        HTMLParser.HTMLParser.__init__(self, *args, **kwargs)
        self.in_form = False
        self.values = []

    def handle_starttag(self, tag, attrs):
        tag = tag.lower()
        attrs = dict(attrs)

        if tag == 'form' and attrs['id'] == 'login_form':
            self.in_form = True
        elif self.in_form and tag == 'input' and attrs['type'] == 'hidden':
            self.values.append( (attrs['name'], attrs['value']) )

    def handle_endtag(self, tag):
        if tag.lower() == 'form' and self.in_form:
            self.in_form = False

 def main():
    if len(sys.argv) < 2:
        usage()
    
    user = sys.argv[1]

    if len(sys.argv) < 3:
        passw = getpass.getpass("Enter password: ")
    else:
        passw = sys.argv[2]

    # Set needed modules
    CHandler = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
    browser = urllib2.build_opener(CHandler)
    browser.addheaders = [('User-agent', 'InFB - ruel@ruel.me - http://ruel.me')]
    urllib2.install_opener(browser)

    #Retrieve login form data and initialize the cookies
    print 'Initializing..'
    res = browser.open('https://www.facebook.com/login.php')

    #Determine string encoding
    content_type = res.info()['Content-Type'].split('; ')
    encoding = 'utf-8'
    if len(content_type) > 1 and content_type[1].startswith('charset'):
        encoding = content_type[1].split('=')[1]
    html = unicode( res.read(), encoding=encoding )
    res.close()

    #scrape form for hidden inputs, add email and password to values
    form_scraper = FormScraper()
    form_scraper.feed(html)
    form_data = form_scraper.values
    form_data.extend( [('email', user), ('pass', passw)] )
    #HACK: urlencode doesn't like strings that aren't encoded with the 'encode' function.
    #Using html.encode(encoding) doesn't help either. why ??
    form_data = [ ( x.encode(encoding), y.encode(encoding) ) for x,y in form_data ]
    data = urllib.urlencode(form_data)

    # Login
    print 'Logging in to account ' + user
    res = browser.open('https://login.facebook.com/login.php?login_attempt=1', data)
    rcode = res.code
    print rcode
    print res.url
    if not re.search('home\.php$', res.url):
        print 'Login Failed'
        exit(2)
    res.close()

    # Get Emails and Phone Numbers
    print "Getting Info..\n"
    flog = open(user + '.html', 'a')
    flog.write("<html>\n\t<head>\n\t\t<title>InFB - " + user + "</title>\n\t\t<link href=\"infb.css\" rel=\"stylesheet\" type=\"text/css\" />\n\t</head>\n\t<body>\n\t\t<div class=\"rby\">\n\t\t\t<table class=\"flist\">\n\t\t\t\t")
    page = 0
    while True:
        res = browser.open('http://m.facebook.com/friends.php?a&f=' + str(page))
        parp = res.read()
        m = re.findall('"\/friends\.php\?id=([0-9]+)&', parp)
        res.close()
        for i in m:
            prof = 'http://m.facebook.com/profile.php?id=' + i + '&v=info'
            res = browser.open(prof)
            cont = res.read()
            res.close()
            prof = prof.replace('m.', 'www.')
            ms = re.search('<div id="body"><div><div>(.*?)<\/div>', cont)
            if ms:
                name = ms.group(1)
            else:
                continue
            ms = re.search('href="tel:(.*?)"', cont)
            if ms:
                tel = ms.group(1)
            else:
                tel = ''
            ms = re.search('Emails?:<\/div><\/td><td valign="top"><div>(.*?)<\/div>', cont)
            if ms:
                email = re.sub('<br \/>', ', ', ms.group(1)).replace('&#64;', '@')
            else:
                continue
            print name + ' : ' + email + ' ' + tel
            flog.write("<tr class=\"lbreak\">\n\t\t\t\t\t<td class=\"num\">" + i + "</td><td class=\"fname\"><a href=\"" + prof + "\" title=\"" + name + "\">" + name + "</a></td><td class=\"fmail\">" + email + "</td></td><td class=\"cnum\">" + tel + "</td>\n\t\t\t\t\t</tr>\n\t\t\t\t")
        if re.search('Next', parp):
            page += 10
        else:
            break
    flog.write("\n\t\t\t</table>\n\t\t</div>\n\t</body>\n</html>")
    flog.close()

 def usage():
    print 'Usage: ' + sys.argv[0] + ' user@domain.tld [password]'
    sys.exit(1)

 if __name__ == '__main__':
    main()
	/*
	Ruel Pagayon (c) 2010 - ruel@ruel.me

	Cascading Style Sheet for InFB Log Output.
	*/
	body {
	background-color: #3C3C3C;
	color: #FFF;
	margin-top: 50px;
	margin-left: 25px;
	font-size: xx-small;
	font-family: Calibri, Arial, sans;
	}
	.rby {
	text-align: center;
	font-size: xx-small;
	}

	table {
	text-align: center;
	}

	td {
	padding-top: 0.5em;
	padding-bottom: 0.5em;
	padding-left: 1em;
	padding-right: 1em;
	text-align: left;
	font-size: small;
	}

	td.num {
	color: #CCC;
	}

	td.cnum {
	color: #AFAFAF;
	}

	a:active, a:visited, a:link {
	color: #FFF;
	font-weight: bold;
	text-decoration: none;
	}

	a:hover {
	color: #FFF;
	font-weight: bold;
	text-decoration: underline;
	}
	#!/usr/bin/python
	#
	# InFB - Information Facebook
	# Usage: infb.py user@domain.tld password
	# http://ruel.me
	#
	# Copyright (c) 2010, Ruel Pagayon - ruel@ruel.me
	# All rights reserved.
	#
	# Redistribution and use in source and binary forms, with or without
	# * Redistributions of source code must retain the above copyright
	# notice, this list of conditions and the following disclaimer.
	# * Redistributions in binary form must reproduce the above copyright
	# notice, this list of conditions and the following disclaimer in the
	# documentation and/or other materials provided with the distribution.
	# * Neither the name of ruel.me nor the names of its contributors
	# may be used to endorse or promote products derived from this
	# script without specific prior written permission.
	#
	# THIS SCRIPT IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
	# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
	# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
	# DISCLAIMED. IN NO EVENT SHALL RUEL PAGAYON BE LIABLE FOR ANY
	# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
	# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
	# SCRIPT, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


	import sys, re, urllib, urllib2, cookielib, HTMLParser, getpass

	class FormScraper(HTMLParser.HTMLParser):
	"""
	Scrapes the Facebook login page for form values that need to be submitted on login.
	Necessary because the form values change each time the login page is loaded.

	Usage:
	form_scraper = FormScraper()
	form_scraper.feed(html_from_facebook)
	form_values = form_scraper.values
	"""

	def __init__(self, args, *kwargs):
	HTMLParser.HTMLParser.__init__(self, args, *kwargs)
	self.in_form = False
	self.values = []

	def handle_starttag(self, tag, attrs):
	tag = tag.lower()
	attrs = dict(attrs)

	if tag == 'form' and attrs['id'] == 'login_form':
	self.in_form = True
	elif self.in_form and tag == 'input' and attrs['type'] == 'hidden':
	self.values.append( (attrs['name'], attrs['value']) )

	def handle_endtag(self, tag):
	if tag.lower() == 'form' and self.in_form:
	self.in_form = False

	def main():
	if len(sys.argv) < 2:
	usage()

	user = sys.argv[1]

	if len(sys.argv) < 3:
	passw = getpass.getpass("Enter password: ")
	else:
	passw = sys.argv[2]

	# Set needed modules
	CHandler = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
	browser = urllib2.build_opener(CHandler)
	browser.addheaders = [('User-agent', 'InFB - ruel@ruel.me - http://ruel.me')]
	urllib2.install_opener(browser)

	#Retrieve login form data and initialize the cookies
	print 'Initializing..'
	res = browser.open('https://www.facebook.com/login.php')

	#Determine string encoding
	content_type = res.info()['Content-Type'].split('; ')
	encoding = 'utf-8'
	if len(content_type) > 1 and content_type[1].startswith('charset'):
	encoding = content_type[1].split('=')[1]
	html = unicode( res.read(), encoding=encoding )
	res.close()

	#scrape form for hidden inputs, add email and password to values
	form_scraper = FormScraper()
	form_scraper.feed(html)
	form_data = form_scraper.values
	form_data.extend( [('email', user), ('pass', passw)] )
	#HACK: urlencode doesn't like strings that aren't encoded with the 'encode' function.
	#Using html.encode(encoding) doesn't help either. why ??
	form_data = [ ( x.encode(encoding), y.encode(encoding) ) for x,y in form_data ]
	data = urllib.urlencode(form_data)

	# Login
	print 'Logging in to account ' + user
	res = browser.open('https://login.facebook.com/login.php?login_attempt=1', data)
	rcode = res.code
	print rcode
	print res.url
	if not re.search('home\.php$', res.url):
	print 'Login Failed'
	exit(2)
	res.close()

	# Get Emails and Phone Numbers
	print "Getting Info..\n"
	flog = open(user + '.html', 'a')
	flog.write("<html>\n\t<head>\n\t\t<title>InFB - " + user + "</title>\n\t\t<link href=\"infb.css\" rel=\"stylesheet\" type=\"text/css\" />\n\t</head>\n\t<body>\n\t\t<div class=\"rby\">\n\t\t\t<table class=\"flist\">\n\t\t\t\t")
	page = 0
	while True:
	res = browser.open('http://m.facebook.com/friends.php?a&f=' + str(page))
	parp = res.read()
	m = re.findall('"\/friends\.php\?id=([0-9]+)&', parp)
	res.close()
	for i in m:
	prof = 'http://m.facebook.com/profile.php?id=' + i + '&v=info'
	res = browser.open(prof)
	cont = res.read()
	res.close()
	prof = prof.replace('m.', 'www.')
	ms = re.search('<div id="body"><div><div>(.*?)<\/div>', cont)
	if ms:
	name = ms.group(1)
	else:
	continue
	ms = re.search('href="tel:(.*?)"', cont)
	if ms:
	tel = ms.group(1)
	else:
	tel = ''
	ms = re.search('Emails?:<\/div><\/td><td valign="top"><div>(.*?)<\/div>', cont)
	if ms:
	email = re.sub('<br \/>', ', ', ms.group(1)).replace('@', '@')
	else:
	continue
	print name + ' : ' + email + ' ' + tel
	flog.write("<tr class=\"lbreak\">\n\t\t\t\t\t<td class=\"num\">" + i + "</td><td class=\"fname\"><a href=\"" + prof + "\" title=\"" + name + "\">" + name + "</a></td><td class=\"fmail\">" + email + "</td></td><td class=\"cnum\">" + tel + "</td>\n\t\t\t\t\t</tr>\n\t\t\t\t")
	if re.search('Next', parp):
	page += 10
	else:
	break
	flog.write("\n\t\t\t</table>\n\t\t</div>\n\t</body>\n</html>")
	flog.close()

	def usage():
	print 'Usage: ' + sys.argv[0] + ' user@domain.tld [password]'
	sys.exit(1)

	if __name__ == '__main__':
	main()