davidwu111 · May 28, 2023 03:45
diff --git a/main.py b/main.py
 import json
 from urllib.parse import urlparse
 import tldextract


 def extract_urls(har_file_path, url_file_path, domain_txt_file_path, suffix_txt_file_path):
    with open(har_file_path, 'r', encoding='utf-8') as f:
        har_data = json.load(f)

    urls = []
    domains = set()
    second_domains = set()
    for entry in har_data['log']['entries']:
        url = entry['request']['url']
        urls.append(url)
        domain = urlparse(url).netloc
        domains.add(domain)
        second_domain_extract = tldextract.extract(url)
        second_domain = '.'.join(part for part in second_domain_extract[1:] if part)
        second_domains.add(second_domain)

    with open(url_file_path, 'w', encoding='utf-8') as f:
        for url in urls:
            f.write(url + '\n')

    with open(domain_txt_file_path, 'w', encoding='utf-8') as f:
        for domain in domains:
            f.write(domain + '\n')

    with open(suffix_txt_file_path, 'w', encoding='utf-8') as f:
        for second_domain in second_domains:
            f.write(second_domain + '\n')


 # replace the path/to/your/har/file.har with the path to your HAR file
 extract_urls("path/to/your/har/file.har", "urls.txt", "domains.txt", "domain-suffixes.txt")
	import json
	from urllib.parse import urlparse
	import tldextract


	def extract_urls(har_file_path, url_file_path, domain_txt_file_path, suffix_txt_file_path):
	with open(har_file_path, 'r', encoding='utf-8') as f:
	har_data = json.load(f)

	urls = []
	domains = set()
	second_domains = set()
	for entry in har_data['log']['entries']:
	url = entry['request']['url']
	urls.append(url)
	domain = urlparse(url).netloc
	domains.add(domain)
	second_domain_extract = tldextract.extract(url)
	second_domain = '.'.join(part for part in second_domain_extract[1:] if part)
	second_domains.add(second_domain)

	with open(url_file_path, 'w', encoding='utf-8') as f:
	for url in urls:
	f.write(url + '\n')

	with open(domain_txt_file_path, 'w', encoding='utf-8') as f:
	for domain in domains:
	f.write(domain + '\n')

	with open(suffix_txt_file_path, 'w', encoding='utf-8') as f:
	for second_domain in second_domains:
	f.write(second_domain + '\n')


	# replace the path/to/your/har/file.har with the path to your HAR file
	extract_urls("path/to/your/har/file.har", "urls.txt", "domains.txt", "domain-suffixes.txt")