Last active
December 31, 2022 06:39
-
-
Save SafwanLjd/2425de7ce193f8e80d22f4ff396024c9 to your computer and use it in GitHub Desktop.
Facebook post likers fetcher, written in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# Author: Safwan Ljd | |
# Email: eljadisafwan at gmail dot com | |
# Version: 1.1 | |
# Date: 2021-12-10 | |
# License: GPL-3.0-or-later | |
# Forked From: [fb_likers_fetcher.py](https://gitlab.com/-/snippets/2478576) | |
# | |
# | |
# This is a fork of `fb_likers_fetcher.py` that includes phone numbers | |
# from the Facebook leak in the CSV file. | |
# | |
# This is a CLI tool that pulls the likers of any given Facebook post | |
# and saves their information in a CSV file. | |
# | |
# Run `python fb_likers_fetcher_with_phone.py --help` for help. | |
# | |
# Facebook requires you to be logged in in order to view the likers | |
# of posts, so you will need to specify your username and password | |
# using the DEFAULT_USERNAME and DEFAULT_PASSWORD variables below, | |
# or using the the --username and --password flags, alternatively, | |
# you can specify your Facebook cookie in in the DEFAULT_COOKIE | |
# variable below, or using the --cookie flag. | |
# | |
# Note that if you're on Windows and have the `pycryptodomex` and | |
# the `pywin32` libraries installed, the script will try to fetch | |
# your cookie from Google Chrome automatically. | |
# | |
# For the phone numbers fetching, you'd need to have a sqlite3 file | |
# named `facebook.db` with a table named users that has the column | |
# `id`, which is the ID of the facebook account the number belongs | |
# to, and the column `phone`, which is the actual phone number. | |
# | |
# If you don't have the Facebook leak data, just use the original | |
# `fb_likers_fetcher.py`, which is way faster due to not having to | |
# deal with the database reading overhead. | |
# | |
# | |
# Libraries you need to Install: | |
# bs4 | |
# lxml | |
# requests | |
# click | |
# pycryptodomex (if you're on Windows) | |
# pywin32 (if you're on Windows) | |
# | |
from bs4 import BeautifulSoup | |
import requests | |
import sqlite3 | |
import click | |
import math | |
import sys | |
import re | |
import os | |
DEFAULT_USERNAME = "" | |
DEFAULT_PASSWORD = "" | |
DEFAULT_COOKIE = "" # A String in This Format -> "xs=xxxxxxx; c_user=xxxxxxx" | |
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0" | |
CHUNK_SIZE = 50 # Don't Change This Unless You Know What You're Doing | |
class FacebookCrawler(): | |
def __init__(self, username: str = "", password: str = "", cookie: str = "", user_agent: str = DEFAULT_USER_AGENT): | |
self.username = username | |
self.password = password | |
self.cookie = cookie | |
self.user_agent = user_agent | |
self.session = requests.Session() | |
headers = { | |
"User-Agent": self.user_agent, | |
"Cookie": self.cookie, | |
"Sec-Fetch-Dest": "document", | |
"Sec-Fetch-Mode": "navigate" | |
} | |
self.session.headers.update(headers) | |
def login(self) -> None: | |
if not self.cookie: | |
login_url = "https://m.facebook.com/login.php" | |
response = self.session.get(login_url) | |
soup = BeautifulSoup(response.text, features="lxml") | |
data = { | |
"lsd": soup.find(attrs={"name": "lsd"}).get("value"), | |
"m_ts": soup.find(attrs={"name": "m_ts"}).get("value"), | |
"li": soup.find(attrs={"name": "li"}).get("value"), | |
"email": self.username, | |
"pass": self.password, | |
"login": "Log In", | |
"version": 1 | |
} | |
self.session.post(login_url, data=data) | |
def get_likers_info_list(self, likers_page_soup: BeautifulSoup, ids_list: list = [], likers_num: int = 0) -> list: | |
likers = likers_page_soup.find_all(attrs={"class": "_4mn c"}) | |
likers_num = likers_num or len(likers) | |
likers_info_list = [] | |
for i in range(likers_num): | |
a_tag = likers[i].find("a") | |
account_endpoint = a_tag.get("href") | |
user_id = ids_list[i] if ids_list else self.__get_id_from_endpoint(endpoint=account_endpoint) | |
username = self.__get_username_from_endpoint(endpoint=account_endpoint) | |
phone = get_phone_num_from_id(user_id) | |
name = a_tag.text | |
name_split = name.split(" ") | |
first_name = name_split[0] | |
last_name = name_split[-1] | |
user_info = { | |
"user_id": user_id, | |
"username": username, | |
"first_name": first_name, | |
"last_name": last_name, | |
"phone": phone | |
} | |
likers_info_list.append(user_info) | |
return likers_info_list | |
def get_total_likes(self, likers_page_soup: BeautifulSoup) -> int: | |
matches = re.findall(r"total_count=[0-9]*", str(likers_page_soup)) | |
try: | |
return int(re.sub(r"[^0-9]", "", matches[0])) | |
except Exception: | |
sys.exit("[*] Unable to Obtain Post Likes Count!\n[*] Check Your Credentials and Make Sure The Link is Valid...") | |
def get_likers_page_soup(self, likers_page_url: str) -> BeautifulSoup: | |
response = self.session.get(likers_page_url) | |
return BeautifulSoup(self.__clean_html(response.text), features="lxml") | |
def get_likers_page_url(self, post_id: str) -> str: | |
return "https://m.facebook.com/ufi/reaction/profile/browser/?ft_ent_identifier=" + post_id | |
def get_next_likers_page_url(self, prev_likers_page_soup: BeautifulSoup) -> str: | |
endpoint = prev_likers_page_soup.find(attrs={"class": "touchable primary"}).get("href") | |
return "https://m.facebook.com" + endpoint | |
def get_ids_list_from_next_likers_page_url(self, next_likers_page_url: str) -> list: | |
return next_likers_page_url.split("shown_ids=")[-1].split("&")[0].split("%2C")[:CHUNK_SIZE] | |
def get_post_id(self, post_url: str) -> str: | |
matches = re.findall(r"posts?[/:]\s?[0-9]*", post_url) | |
if not matches: | |
sys.exit("[*] Invalid Post URL") | |
return re.sub(r"[^0-9]", "", matches[0]) | |
def __get_username_from_endpoint(self, endpoint: str) -> str: | |
username = "" | |
try: | |
username = endpoint[1:].split("?")[0] if (not re.findall(r"\?id=[0-9]*", endpoint)) else username | |
finally: | |
return username | |
def __get_id_from_endpoint(self, endpoint: str) -> str: | |
user_id = "" | |
try: | |
id_section = re.findall(r"\?id=[0-9]*", endpoint) | |
if id_section: | |
user_id = id_section.replace("?id=", "") | |
else: | |
response = self.session.get("https://m.facebook.com" + endpoint) | |
matches = re.findall(r"entity_id:[0-9]*", response.text) | |
matches = matches or re.findall(r"currentProfileID:[0-9]*", response.text) | |
matches = matches or re.findall(r"\"entity_id\":\"[0-9]*\"", response.text) | |
matches = matches or re.findall(r"\"userID\":\"[0-9]*\"", response.text) | |
for match in matches: | |
user_id = re.sub(r"[^0-9]", "", match) | |
if user_id: | |
break | |
finally: | |
return user_id | |
def __clean_html(self, html_text: str) -> str: | |
html_text = re.sub(r"\\[^u\"/]", "", html_text) | |
html_text = html_text.replace("\\\"", "\"") | |
html_text = html_text.replace("\\/", "/") | |
if "<" not in html_text: | |
html_text = html_text.encode().decode("unicode_escape") | |
return html_text | |
def create_csv_file(file_name: str = "", file_num: int = 1, prefix: str = "users-", ext: str = ".csv") -> str: | |
while not file_name: | |
current_file_name = prefix + str(file_num) + ext | |
if not os.path.exists(current_file_name): | |
file_name = current_file_name | |
else: | |
file_num += 1 | |
file_name = (file_name + ext) if (file_name[-len(ext):].lower()) != ext else file_name | |
with open(file_name, "wb") as file: | |
file.write(b"\xEF\xBB\xBF") | |
with open(file_name, "a", encoding="UTF-8") as file: | |
file.write("id,username,fn,ln,phone,email,url\n") | |
return file_name | |
def append_to_csv_file(likers_info_list: list, file_name: str) -> None: | |
with open(file_name, "a", encoding="UTF-8") as file: | |
for user_info in likers_info_list: | |
user_id = user_info["user_id"] | |
username = user_info["username"] | |
first_name = user_info["first_name"] | |
last_name = user_info["last_name"] | |
phone = user_info["phone"] | |
email = "" | |
url = "" | |
identifier = username or user_id | |
if identifier: | |
email = identifier + "@facebook.com" | |
url = "https://www.facebook.com/" + identifier | |
file.write(",".join([user_id, username, first_name, last_name, phone, email, url]) + "\n") | |
def get_phone_num_from_id(user_id: str) -> str: | |
phone_num = "" | |
try: | |
fb_db_cur.execute("SELECT (phone) FROM users WHERE id = ?", (int(user_id),)) | |
phone_num = fb_db_cur.fetchone() | |
phone_num = "0" + str(phone_num[0]) | |
finally: | |
return phone_num | |
def get_chrome_fb_cookie() -> str: | |
fb_cookie_str = "" | |
try: | |
from Cryptodome.Cipher.AES import new, MODE_GCM | |
from win32.win32crypt import CryptUnprotectData | |
from base64 import b64decode | |
import json | |
local_appdata = os.path.expandvars("%LOCALAPPDATA%") | |
cookies_file = local_appdata + "/Google/Chrome/User Data/Default/Cookies" | |
local_state_file = local_appdata + "/Google/Chrome/User Data/Local State" | |
with open(local_state_file) as local_state: | |
key = CryptUnprotectData(b64decode(json.load(local_state)["os_crypt"]["encrypted_key"])[5:])[1] | |
with sqlite3.connect(cookies_file) as conn: | |
conn.create_function("decrypt", 1, lambda v: new(key, MODE_GCM, v[3:15]).decrypt(v[15:-16]).decode()) | |
fb_cookies_dict = dict(conn.execute("SELECT name, decrypt(encrypted_value) FROM cookies WHERE host_key = \".facebook.com\" AND name = \"xs\" OR name = \"c_user\"")) | |
fb_cookie_str = "xs=" + fb_cookies_dict["xs"] + "; c_user=" + fb_cookies_dict["c_user"] | |
print("Fetched Facebook Cookie From Google Chrome!") | |
print("Cookie: \"" + fb_cookie_str + "\"\n") | |
finally: | |
return fb_cookie_str | |
def fetch_likers(crawler: FacebookCrawler, url: str, number: int, file_name: str) -> None: | |
post_id = crawler.get_post_id(post_url=url) | |
likers_page_url = crawler.get_likers_page_url(post_id=post_id) | |
soup = crawler.get_likers_page_soup(likers_page_url=likers_page_url) | |
total_likes = crawler.get_total_likes(likers_page_soup=soup) | |
number = number if (number and number < total_likes) else total_likes | |
chunks = math.ceil(number / CHUNK_SIZE) | |
file_name = create_csv_file(file_name=file_name) | |
header = "==================== Post ID: " + post_id + " ====================" | |
footer = "=" * len(header) | |
print("\n" + header) | |
print("Output File:\t" + file_name) | |
print("Total likes:\t" + str(total_likes)) | |
print("Fetching:\t" + str(number)) | |
print("Chunks:\t\t" + str(chunks)) | |
print("\nFetching...") | |
try: | |
for chunk_num in range(chunks): | |
print("[" + str("{:.2f}".format((chunk_num) / (chunks) * 100)) + "%" + " Done]\tFetching chunk " + str(chunk_num + 1) + " out of " + str(chunks), end="...\r") | |
soup = crawler.get_likers_page_soup(likers_page_url=next_likers_page_url) if (chunk_num > 0) else soup | |
try: | |
next_likers_page_url = crawler.get_next_likers_page_url(prev_likers_page_soup=soup) | |
ids_list = crawler.get_ids_list_from_next_likers_page_url(next_likers_page_url=next_likers_page_url) | |
except Exception: | |
ids_list = [] | |
append_to_csv_file(likers_info_list=crawler.get_likers_info_list(likers_page_soup=soup, ids_list=ids_list, likers_num=(len(ids_list) or (number % CHUNK_SIZE))), file_name=file_name) | |
print("[100.00% Done]\t" + str(chunks) + " out of " + str(chunks) + " chunks were fetched!") | |
print(footer + "\n") | |
except Exception as exception: | |
print("\n[*] Something Went Wrong!!\n[*] Your Account MIGHT Have Been Temporarily Blacklisted By Facebook\n[*] Technical Details: " + str(exception), file=sys.stderr) | |
@click.option("-o", "--output", help="The name of the CSV file, it defaults to \"users-1.csv\"") | |
@click.option("--password", help="The password of your Facebook account") | |
@click.option("--username", help="The username of your Facebook account") | |
@click.option("--cookie", help="The cookie of your Facebook account") | |
@click.option("-n", "--number", help="Maximum number of likers to fetch") | |
@click.option("-f", "--file", help="A file containing posts you want to fetch the likers of") | |
@click.option("-u", "--url", help="The URL of the post you want to fetch the likers of") | |
@click.command() | |
def fetcher(url: str, file: str, number: int, cookie: str, username: str, password: str, output: str) -> None: | |
"""A Tool That Fetches The Likers of Any Given Facebook Post and Saves Them in a CSV File""" | |
if not url and not file: | |
sys.exit("[*] You Must Specify Either a URL [--url/-u] or a file [--file/-f]") | |
posts_urls = [] | |
if file: | |
output and print("[*] Ignoring the Output File Specified Because [--file/-f] Was Used\n", file=sys.stderr) | |
output = "" | |
with open(file, "r") as posts_file: | |
posts_urls = posts_file.readlines() | |
print("Total Posts: " + str(len(posts_urls))) | |
if url: | |
posts_urls.insert(0, url) | |
cookie = cookie or DEFAULT_COOKIE or get_chrome_fb_cookie() | |
username = username or DEFAULT_USERNAME | |
password = password or DEFAULT_PASSWORD | |
number = number and int(number) | |
crawler = FacebookCrawler(username=username, password=password, cookie=cookie) | |
crawler.login() | |
for post_url in posts_urls: | |
post_url = post_url.strip() | |
if post_url and post_url[0] not in ["#", ";"]: | |
fetch_likers(crawler=crawler, url=post_url, number=number, file_name=output) | |
if __name__ == "__main__": | |
try: | |
with sqlite3.connect('./facebook.db') as fb_db_conn: | |
fb_db_cur = fb_db_conn.cursor() | |
fetcher(prog_name="fetcher") | |
except Exception as exception: | |
print("[*] Something Went Wrong!!\n[*] Technical Details: " + str(exception), file=sys.stderr) | |
except KeyboardInterrupt: | |
print("\nAborted!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This was moved over to GitLab: https://gitlab.com/-/snippets/2478577