Last active
September 7, 2025 23:59
-
-
Save scumdestroy/319492262823a29f23c349dbd6912c0f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
Greetings bug-slaying brothers of the pythonian blood. This script takes my httpx output after slamming in a bunch of subdomains | |
and organizes it so its a bit easier to read and work with. | |
The HTTPX payload I use first is: | |
httpx -sc -cl -title -bp -server -td -ip -cname -asn -cdn -vhost -fhr | anew httpx-quicc | |
This script will organize the data by status code and then from smallest to largest for each status code | |
output looks like: | |
#200s | |
https://goodurl.com/ [18] | |
https://based.goodurl.com [1049] | |
#301s | |
https://redir.based.goodurl.com [223] | |
and so on... | |
''' | |
import sys | |
import re | |
from collections import defaultdict | |
def strip_ansi(text): | |
# ANSI color codes made parsing this total hell until I removed them like so | |
ansi_escape = re.compile(r'\x1b\[[0-9;]*m') | |
return ansi_escape.sub('', text) | |
def parse_httpx_line(line): | |
line = line.strip() | |
if not line or not line.startswith(('http://', 'https://')): | |
return None | |
try: | |
clean_line = strip_ansi(line) | |
# annihilating these script breaking nasty output chunks | |
parts = clean_line.split(' ', 1) | |
if len(parts) < 2: | |
return None | |
url = parts[0] | |
rest = parts[1] | |
# bracket extraction ceremony | |
brackets = re.findall(r'\[([^\]]*)\]', rest) | |
if len(brackets) < 2: | |
return None | |
# first bracket is status code, then content length | |
status_raw = brackets[0] | |
try: | |
content_length = int(brackets[1]) | |
except ValueError: | |
return None | |
# some lines have [301, 302] for status code, which is DEALT WITH below | |
if ',' in status_raw: | |
primary_status = status_raw.split(',')[0] | |
else: | |
primary_status = status_raw | |
# some of httpx's returned status codes are very creative and abstract | |
if not primary_status.isdigit(): | |
return None | |
return { | |
'url': url, | |
'status': primary_status, | |
'content_length': content_length, | |
'raw_status': status_raw | |
} | |
except Exception as e: | |
print(f"We in trouble: {clean_line[:50]}... - {e}", file=sys.stderr) | |
return None | |
def main(): | |
try: | |
# you can pipe your httpx-output or use it as an arg | |
if len(sys.argv) > 1: | |
with open(sys.argv[1], 'r', encoding='utf-8', errors='ignore') as f: | |
lines = f.readlines() | |
else: | |
lines = sys.stdin.readlines() | |
status_groups = defaultdict(list) | |
for line_num, line in enumerate(lines, 1): | |
parsed = parse_httpx_line(line) | |
if parsed: | |
status_groups[parsed['status']].append(parsed) | |
if not status_groups: | |
print("Your data be looking sus and fried. try again. please.", file=sys.stderr) | |
return | |
# genius level lambda sorting magic | |
for status in sorted(status_groups.keys(), key=int): | |
entries = sorted(status_groups[status], key=lambda x: x['content_length']) | |
print(f"#{status}s") | |
for entry in entries: | |
print(f"{entry['url']} [{entry['content_length']}]") | |
print() | |
except Exception as e: | |
print(f"Error: {e}", file=sys.stderr) | |
sys.exit(1) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment