Skip to content

Instantly share code, notes, and snippets.

@apeckham
Created July 18, 2024 06:44
Show Gist options
  • Save apeckham/1a21776b8610a536c700137e9ef88140 to your computer and use it in GitHub Desktop.
Save apeckham/1a21776b8610a536c700137e9ef88140 to your computer and use it in GitHub Desktop.
extract contents of a HAR file to disk, and request the URL if the content is not in the HAR
import json
import base64
import os
import sys
from urllib.parse import urlparse
import requests
def fetch_url(url):
try:
response = requests.get(url, timeout=10)
response.raise_for_status() # Raises an HTTPError for bad responses
return response.content
except requests.RequestException as e:
print(f"Error fetching {url}: {str(e)}")
return None
def extract_files_from_har(har_data):
for entry in har_data['log']['entries']:
url = entry['request']['url']
parsed_url = urlparse(url)
# Create directory structure in current directory
host_path = parsed_url.netloc
file_path = os.path.join(host_path, parsed_url.path.lstrip('/'))
# Ensure the directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# Check if file already exists
if os.path.exists(file_path):
print(f"Skipping (file exists): {file_path}")
continue
# Get the file content
content = entry['response']['content'].get('text')
if content:
# If content is base64 encoded
if entry['response']['content'].get('encoding') == 'base64':
file_content = base64.b64decode(content)
else:
file_content = content.encode('utf-8')
else:
print(f"Fetching content for: {url}")
file_content = fetch_url(url)
if file_content is None:
print(f"Skipping (fetch failed): {url}")
continue
# Write the file
try:
with open(file_path, 'wb') as file:
file.write(file_content)
print(f"Extracted: {file_path}")
except IOError as e:
print(f"Error writing file {file_path}: {str(e)}")
if __name__ == "__main__":
# Read HAR data from stdin
har_data = json.load(sys.stdin)
extract_files_from_har(har_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment