Last active
February 21, 2022 21:34
-
-
Save nikolaysm/2c0f0c16084284c8be8f71f83dba71b8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# This small example shows you how to access JS-based requests via Selenium | |
# Like this, one can access raw data for scraping, | |
# for example on many JS-intensive/React-based websites | |
# | |
from time import sleep | |
from selenium import webdriver | |
from selenium.webdriver import DesiredCapabilities | |
# make chrome log requests | |
capabilities = DesiredCapabilities.CHROME | |
""" | |
As specified in the release notes for ChromeDriver 75.0.3770.8, | |
capability loggingPrefs has been renamed to goog:loggingPrefs | |
# https://chromedriver.chromium.org/downloads#h.p_ID_520 | |
# https://chromedriver.storage.googleapis.com/75.0.3770.8/notes.txt | |
""" | |
capabilities["goog:loggingPrefs"] = {"performance": "ALL"} | |
driver = webdriver.Chrome( | |
desired_capabilities=capabilities, executable_path="./chromedriver" | |
) | |
# fetch a site that does xhr requests | |
driver.get("https://sitewithajaxorsomething.com") | |
sleep(5) # wait for the requests to take place | |
# extract requests from logs | |
logs_raw = driver.get_log("performance") | |
logs = [json.loads(lr["message"])["message"] for lr in logs_raw] | |
def log_filter(log_): | |
return ( | |
# is an actual response | |
log_["method"] == "Network.responseReceived" | |
# and json | |
and "json" in log_["params"]["response"]["mimeType"] | |
) | |
for log in filter(log_filter, logs): | |
request_id = log["params"]["requestId"] | |
resp_url = log["params"]["response"]["url"] | |
print(f"Caught {resp_url}") | |
print(driver.execute_cdp_cmd("Network.getResponseBody", {"requestId": request_id})) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment