apeckham · July 18, 2024 06:44
diff --git a/extract.py b/extract.py
 import json
 import base64
 import os
 import sys
 from urllib.parse import urlparse
 import requests

 def fetch_url(url):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()  # Raises an HTTPError for bad responses
        return response.content
    except requests.RequestException as e:
        print(f"Error fetching {url}: {str(e)}")
        return None

 def extract_files_from_har(har_data):
    for entry in har_data['log']['entries']:
        url = entry['request']['url']
        parsed_url = urlparse(url)
        
        # Create directory structure in current directory
        host_path = parsed_url.netloc
        file_path = os.path.join(host_path, parsed_url.path.lstrip('/'))
        
        # Ensure the directory exists
        os.makedirs(os.path.dirname(file_path), exist_ok=True)
        
        # Check if file already exists
        if os.path.exists(file_path):
            print(f"Skipping (file exists): {file_path}")
            continue
        
        # Get the file content
        content = entry['response']['content'].get('text')
        
        if content:
            # If content is base64 encoded
            if entry['response']['content'].get('encoding') == 'base64':
                file_content = base64.b64decode(content)
            else:
                file_content = content.encode('utf-8')
        else:
            print(f"Fetching content for: {url}")
            file_content = fetch_url(url)
            if file_content is None:
                print(f"Skipping (fetch failed): {url}")
                continue
        
        # Write the file
        try:
            with open(file_path, 'wb') as file:
                file.write(file_content)
            print(f"Extracted: {file_path}")
        except IOError as e:
            print(f"Error writing file {file_path}: {str(e)}")

 if __name__ == "__main__":
    # Read HAR data from stdin
    har_data = json.load(sys.stdin)
    extract_files_from_har(har_data)
	import json
	import base64
	import os
	import sys
	from urllib.parse import urlparse
	import requests

	def fetch_url(url):
	try:
	response = requests.get(url, timeout=10)
	response.raise_for_status() # Raises an HTTPError for bad responses
	return response.content
	except requests.RequestException as e:
	print(f"Error fetching {url}: {str(e)}")
	return None

	def extract_files_from_har(har_data):
	for entry in har_data['log']['entries']:
	url = entry['request']['url']
	parsed_url = urlparse(url)

	# Create directory structure in current directory
	host_path = parsed_url.netloc
	file_path = os.path.join(host_path, parsed_url.path.lstrip('/'))

	# Ensure the directory exists
	os.makedirs(os.path.dirname(file_path), exist_ok=True)

	# Check if file already exists
	if os.path.exists(file_path):
	print(f"Skipping (file exists): {file_path}")
	continue

	# Get the file content
	content = entry['response']['content'].get('text')

	if content:
	# If content is base64 encoded
	if entry['response']['content'].get('encoding') == 'base64':
	file_content = base64.b64decode(content)
	else:
	file_content = content.encode('utf-8')
	else:
	print(f"Fetching content for: {url}")
	file_content = fetch_url(url)
	if file_content is None:
	print(f"Skipping (fetch failed): {url}")
	continue

	# Write the file
	try:
	with open(file_path, 'wb') as file:
	file.write(file_content)
	print(f"Extracted: {file_path}")
	except IOError as e:
	print(f"Error writing file {file_path}: {str(e)}")

	if __name__ == "__main__":
	# Read HAR data from stdin
	har_data = json.load(sys.stdin)
	extract_files_from_har(har_data)