Last active
April 18, 2024 10:23
-
-
Save bcarroll/0c5c9bae18c8b6dc7b7a3eea2748a713 to your computer and use it in GitHub Desktop.
Selenium Python - Get HTTP Status Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Implementation of the Selenium Chrome WebDriver | |
with HTTP Response data included via the ChromeDriver performance logging capability | |
https://sites.google.com/a/chromium.org/chromedriver/logging/performance-log | |
The ChromeWebDriver response attribute(s) contain a dict with information about the response | |
{ | |
"connectionId": [Integer], | |
"connectionReused": [Boolean], | |
"encodedDataLength": [Integer], | |
"fromDiskCache": [Boolean], | |
"fromServiceWorker": [Boolean], | |
"headers": [dict], # HTTP Headers as a dict | |
"headersText": [String], # HTTP Headers as text | |
"mimeType": [String], | |
"protocol": [String], | |
"remoteIPAddress": [String], | |
"remotePort": [Integer], | |
"requestHeaders": [dict], | |
"requestHeadersText": [String], | |
"securityDetails": [dict], # TLS/SSL related information | |
"securityState": [String], | |
"status": [Integer], # HTTP Status Code of the Response | |
"statusText": [String], | |
"timing": [dict], | |
"url": [String] | |
} | |
Example: | |
from ChromeWebDriver import ChromeWebDriver | |
browser = ChromeWebDriver('https://github.com', headless=False) | |
print(browser.response["status"]) # prints the HTTP status code of the response (for the url in the class initialization) | |
for response in browser.responses: # the responses attribute contains a list of dicts containing all responses received (css, js, etc.) | |
print(response) # prints a dict containing data for each response | |
""" | |
import json | |
from selenium import webdriver | |
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | |
class ChromeWebDriver(): | |
def __init__(self, url, headless=False, autoclose=True): | |
self.url = url # URL to fetch | |
self.options = webdriver.ChromeOptions() | |
self.options.headless = headless | |
self.desired_capabilities = DesiredCapabilities.CHROME | |
self.desired_capabilities['loggingPrefs'] = {'performance': 'ALL'} | |
self.driver = webdriver.Chrome(options=self.options, desired_capabilities=self.desired_capabilities) | |
self.driver.get(url) # get the requested URL | |
self.responses = [] # list to store each response | |
perfLog = self.driver.get_log('performance') | |
for logIndex in range(0, len(perfLog)): # Parse the Chrome Performance logs | |
logMessage = json.loads(perfLog[logIndex]["message"])["message"] | |
if logMessage["method"] == "Network.responseReceived": # Filter out HTTP responses | |
self.responses.append(logMessage["params"]["response"]) # append each response to self.responses | |
if logMessage["params"]["response"]["url"] == self.url: # create instance attributes containing the response call for self.url | |
self.response = logMessage["params"]["response"] | |
self.driver.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example of working with the most recent versions of Selenium: Chromedriver_Get_Response.py