Created
September 8, 2025 01:23
-
-
Save scumdestroy/30adc7dd1ed8ac8a07643f2dc8cb5734 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
Get the excellent GoLinkFinder tool via github.com/0xsha/GoLinkFinder | |
... based on my boy here: https://github.com/GerbenJavado/LinkFinder | |
Anyways, this gives an excellent clean and parsed output after running GoLinkFinder on a gang of urls. | |
use this like: | |
python3 golinkfinderx.py urls.txt | |
''' | |
import sys | |
import re | |
import subprocess | |
import tempfile | |
import os | |
from urllib.parse import urlparse | |
from pathlib import Path | |
def load_urls(urls_file): | |
urls = [] | |
domains = set() | |
try: | |
with open(urls_file, 'r') as f: | |
for line in f: | |
line = line.strip() | |
if line: | |
urls.append(line) | |
# Storing a domain for later - just trust me | |
if line.startswith(('http://', 'https://')): | |
parsed = urlparse(line) | |
domains.add(parsed.netloc.lower()) | |
else: | |
# domains just be domains even when they arent | |
domains.add(line.lower()) | |
if not line.startswith(('http://', 'https://')): | |
urls[-1] = 'https://' + line | |
except FileNotFoundError: | |
print(f"Error: {urls_file} not found") | |
sys.exit(1) | |
return urls, domains | |
def run_golinkfinder(urls): | |
all_output = [] | |
print(f"Running GoLinkFinder on {len(urls)} URLs...") | |
for i, url in enumerate(urls, 1): | |
print(f"[{i}/{len(urls)}] Processing {url}") | |
try: | |
result = subprocess.run( | |
['GoLinkFinder', '-d', url], | |
capture_output=True, | |
text=True, | |
timeout=15 | |
) | |
if result.returncode == 0: | |
lines = result.stdout.strip().split('\n') | |
all_output.extend([line.strip() for line in lines if line.strip()]) | |
print(f" ✓ Found {len(lines)} links") | |
else: | |
print(f" ✗ R.I.P to: {url}: {result.stderr.strip()}") | |
except subprocess.TimeoutExpired: | |
print(f" ✗ Timeout processing {url}") | |
except FileNotFoundError: | |
print("Error: GoLinkFinder not even here, bro, what are you even doing?") | |
print("go install github.com/003random/GoLinkFinder@latest") | |
sys.exit(1) | |
except Exception as e: | |
print(f" ✗ Critical Death-Error when encountering: {url}: {e}") | |
print(f"\nTotal links collected: {len(all_output)}") | |
return all_output | |
def clean_path(path): | |
# Cleaning up the wordlist the way that I like them | |
if not path: | |
return "" | |
cleaned = re.sub(r'^[./\\]+', '', path) | |
cleaned = cleaned.lstrip('/') | |
return cleaned | |
def extract_path_from_url(url): | |
try: | |
parsed = urlparse(url) | |
path = parsed.path | |
if parsed.query: | |
path += '?' + parsed.query | |
if parsed.fragment: | |
path += '#' + parsed.fragment | |
return clean_path(path) | |
except: | |
return "" | |
def is_valid_url(line): | |
return line.startswith(('http://', 'https://')) | |
def is_path(line): | |
return (line.startswith(('/', './', '../')) or | |
(not line.startswith(('http://', 'https://')) and | |
('.' in line or '/' in line))) | |
def parse_golinkfinder_output(output_lines, target_domains): | |
wordlist = set() | |
external_urls = set() | |
domain_urls = set() | |
for line_num, line in enumerate(output_lines, 1): | |
line = line.strip() | |
if not line: | |
continue | |
# Remove content that is 99.9% likely to suck, i.e. dates, JS elements | |
if re.match(r'^\d{1,2}/\d{1,2}/\d{4}$', line): | |
continue | |
if is_valid_url(line): | |
try: | |
parsed = urlparse(line) | |
domain = parsed.netloc.lower() | |
if domain in target_domains: | |
domain_urls.add(line) | |
path = extract_path_from_url(line) | |
if path: | |
wordlist.add(path) | |
else: | |
external_urls.add(line) | |
except Exception as e: | |
print(f"Error parsing URL on line {line_num}: {line} - {e}", file=sys.stderr) | |
elif is_path(line): | |
cleaned_path = clean_path(line) | |
if cleaned_path: | |
wordlist.add(cleaned_path) | |
else: | |
if any(char in line for char in ['/', '.', '-', '_']) and not line.isdigit(): | |
cleaned_path = clean_path(line) | |
if cleaned_path: | |
wordlist.add(cleaned_path) | |
return wordlist, external_urls, domain_urls | |
def write_output_files(wordlist, external_urls, domain_urls, raw_output=None): | |
if raw_output: | |
with open('golinkfinder-raw-output.txt', 'w') as f: | |
for line in raw_output: | |
f.write(line + '\n') | |
print(f"Wrote {len(raw_output)} raw lines to golinkfinder-raw-output.txt") | |
with open('golinkfinder-wordlist.txt', 'w') as f: | |
for path in sorted(wordlist): | |
f.write(path + '\n') | |
print(f"Wrote {len(wordlist)} paths to golinkfinder-wordlist.txt") | |
with open('golinkfinder-external-urls.txt', 'w') as f: | |
for url in sorted(external_urls): | |
f.write(url + '\n') | |
print(f"Wrote {len(external_urls)} external URLs to golinkfinder-external-urls.txt") | |
with open('golinkfinder-full-urls.txt', 'w') as f: | |
for url in sorted(domain_urls): | |
f.write(url + '\n') | |
print(f"Wrote {len(domain_urls)} domain-matching URLs to golinkfinder-full-urls.txt") | |
def main(): | |
if len(sys.argv) < 2: | |
print("Usage: python3 golinkfinder_parser.py <urls.txt>") | |
print("\nThis script will:") | |
print(" 1. Run GoLinkFinder on each URL in the file") | |
print(" 2. Collect and parse all output") | |
print(" 3. Create organized output files:") | |
print(" - golinkfinder-wordlist.txt (cleaned paths)") | |
print(" - golinkfinder-external-urls.txt (external domain URLs)") | |
print(" - golinkfinder-full-urls.txt (target domain URLs)") | |
print(" - golinkfinder-raw-output.txt (raw GoLinkFinder output)") | |
print("\nRequires: GoLinkFinder (go install github.com/003random/GoLinkFinder@latest)") | |
sys.exit(1) | |
urls_file = sys.argv[1] | |
urls, target_domains = load_urls(urls_file) | |
print(f"Loaded {len(urls)} URLs with {len(target_domains)} target domains") | |
print(f"Target domains: {sorted(target_domains)}") | |
raw_output = run_golinkfinder(urls) | |
if not raw_output: | |
print("No output collected from GoLinkFinder") | |
sys.exit(1) | |
print("\nParsing collected output...") | |
wordlist, external_urls, domain_urls = parse_golinkfinder_output(raw_output, target_domains) | |
print("\nWriting output files...") | |
write_output_files(wordlist, external_urls, domain_urls, raw_output) | |
print(f"\n=== SUMMARY ===") | |
print(f"URLs processed: {len(urls)}") | |
print(f"Raw boys found: {len(raw_output)}") | |
print(f"Wordlist length: {len(wordlist)}") | |
print(f"External boys: {len(external_urls)}") | |
print(f"In-Scope URLs: {len(domain_urls)}") | |
print(f"\nFiles created:") | |
print(f" - golinkfinder-raw-output.txt") | |
print(f" - golinkfinder-wordlist.txt") | |
print(f" - golinkfinder-external-urls.txt") | |
print(f" - golinkfinder-full-urls.txt") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment