Last active
November 14, 2024 22:27
-
-
Save anthonyeden/38b3c4537ad37f2a3bc48aebca0b6a06 to your computer and use it in GitHub Desktop.
Facebook Live Embed Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This script allows you to scrape a Facebook page for live videos, and generate a iframe embed code for your website. It uses Selenium Firefox web driver. You may get banned if you run this too often - this is yet to be seen. | |
The file `livestream-data.json` will be updated with extra fields if/when a live video is found. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"last_updated": 1584600828.746, | |
"pages": [ | |
{ | |
"facebook_url": "https://www.facebook.com/newslivetvofficial/", | |
"name": "Test FB Page", | |
"website": "https://example.com/" | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import time | |
import random | |
import json | |
import urllib | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.support.ui import WebDriverWait | |
# Load data file from disk | |
data = open('livestream-data.json').read() | |
data = json.loads(data) | |
updated = False | |
fb_embed_template = """<iframe src="https://www.facebook.com/plugins/video.php?href={URL_VIDEO}&show_text=1&width=560" width="560" height="382" style="border:none;overflow:hidden" scrolling="no" frameborder="0" allowTransparency="true" allow="encrypted-media" allowFullScreen="true"></iframe>""" | |
for page in data['pages']: | |
with webdriver.Firefox() as driver: | |
wait = WebDriverWait(driver, 10) | |
driver.get(page['facebook_url']) | |
elements_userContent = driver.find_elements_by_class_name("userContentWrapper") | |
# Loop over all userContent, trying to find an <a> tag | |
for block in elements_userContent: | |
elements = block.find_elements_by_tag_name("a") | |
# Loop over all <a> tags, trying to find video | |
for e in elements: | |
try: | |
if 'video' in e.get_attribute("href"): | |
# Found a video, but need to check it's live | |
parent = e.find_element_by_xpath('..').find_element_by_xpath('..').find_element_by_xpath('..').find_element_by_xpath('..').find_element_by_xpath('..') | |
html = parent.get_attribute("innerHTML") | |
if 'is live now' in html: | |
# Found a live video | |
print "LIVE VIDEO", page['name'], e.get_attribute("href") | |
url = e.get_attribute("href") | |
if "?" in url: | |
url_split = url.split("?") | |
url = url_split[0] | |
page['facebook_live_url'] = url | |
page['facebook_live_last_updated_utc'] = time.time() | |
page['facebook_live_embed'] = fb_embed_template.replace("{URL_VIDEO}", urllib.quote_plus(page['facebook_live_url'])) | |
updated = True | |
except Exception as e: | |
print "EXCEPTION", e | |
# Scroll down the page a bit | |
driver.execute_script("window.scrollTo(0, "+str(random.randint(1000, 5000))+")") | |
# Keep the browser window open a short while before closing it | |
time.sleep(random.randint(9, 25)) | |
# Update file on disk | |
if updated is True: | |
data['last_updated'] = time.time() | |
with open('livestream-data.json', 'w') as f: | |
f.write(json.dumps(data, sort_keys=True, indent=2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment