Skip to content

Instantly share code, notes, and snippets.

@scumdestroy
Created September 8, 2025 01:23
Show Gist options
  • Save scumdestroy/30adc7dd1ed8ac8a07643f2dc8cb5734 to your computer and use it in GitHub Desktop.
Save scumdestroy/30adc7dd1ed8ac8a07643f2dc8cb5734 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
'''
Get the excellent GoLinkFinder tool via github.com/0xsha/GoLinkFinder
... based on my boy here: https://github.com/GerbenJavado/LinkFinder
Anyways, this gives an excellent clean and parsed output after running GoLinkFinder on a gang of urls.
use this like:
python3 golinkfinderx.py urls.txt
'''
import sys
import re
import subprocess
import tempfile
import os
from urllib.parse import urlparse
from pathlib import Path
def load_urls(urls_file):
urls = []
domains = set()
try:
with open(urls_file, 'r') as f:
for line in f:
line = line.strip()
if line:
urls.append(line)
# Storing a domain for later - just trust me
if line.startswith(('http://', 'https://')):
parsed = urlparse(line)
domains.add(parsed.netloc.lower())
else:
# domains just be domains even when they arent
domains.add(line.lower())
if not line.startswith(('http://', 'https://')):
urls[-1] = 'https://' + line
except FileNotFoundError:
print(f"Error: {urls_file} not found")
sys.exit(1)
return urls, domains
def run_golinkfinder(urls):
all_output = []
print(f"Running GoLinkFinder on {len(urls)} URLs...")
for i, url in enumerate(urls, 1):
print(f"[{i}/{len(urls)}] Processing {url}")
try:
result = subprocess.run(
['GoLinkFinder', '-d', url],
capture_output=True,
text=True,
timeout=15
)
if result.returncode == 0:
lines = result.stdout.strip().split('\n')
all_output.extend([line.strip() for line in lines if line.strip()])
print(f" ✓ Found {len(lines)} links")
else:
print(f" ✗ R.I.P to: {url}: {result.stderr.strip()}")
except subprocess.TimeoutExpired:
print(f" ✗ Timeout processing {url}")
except FileNotFoundError:
print("Error: GoLinkFinder not even here, bro, what are you even doing?")
print("go install github.com/003random/GoLinkFinder@latest")
sys.exit(1)
except Exception as e:
print(f" ✗ Critical Death-Error when encountering: {url}: {e}")
print(f"\nTotal links collected: {len(all_output)}")
return all_output
def clean_path(path):
# Cleaning up the wordlist the way that I like them
if not path:
return ""
cleaned = re.sub(r'^[./\\]+', '', path)
cleaned = cleaned.lstrip('/')
return cleaned
def extract_path_from_url(url):
try:
parsed = urlparse(url)
path = parsed.path
if parsed.query:
path += '?' + parsed.query
if parsed.fragment:
path += '#' + parsed.fragment
return clean_path(path)
except:
return ""
def is_valid_url(line):
return line.startswith(('http://', 'https://'))
def is_path(line):
return (line.startswith(('/', './', '../')) or
(not line.startswith(('http://', 'https://')) and
('.' in line or '/' in line)))
def parse_golinkfinder_output(output_lines, target_domains):
wordlist = set()
external_urls = set()
domain_urls = set()
for line_num, line in enumerate(output_lines, 1):
line = line.strip()
if not line:
continue
# Remove content that is 99.9% likely to suck, i.e. dates, JS elements
if re.match(r'^\d{1,2}/\d{1,2}/\d{4}$', line):
continue
if is_valid_url(line):
try:
parsed = urlparse(line)
domain = parsed.netloc.lower()
if domain in target_domains:
domain_urls.add(line)
path = extract_path_from_url(line)
if path:
wordlist.add(path)
else:
external_urls.add(line)
except Exception as e:
print(f"Error parsing URL on line {line_num}: {line} - {e}", file=sys.stderr)
elif is_path(line):
cleaned_path = clean_path(line)
if cleaned_path:
wordlist.add(cleaned_path)
else:
if any(char in line for char in ['/', '.', '-', '_']) and not line.isdigit():
cleaned_path = clean_path(line)
if cleaned_path:
wordlist.add(cleaned_path)
return wordlist, external_urls, domain_urls
def write_output_files(wordlist, external_urls, domain_urls, raw_output=None):
if raw_output:
with open('golinkfinder-raw-output.txt', 'w') as f:
for line in raw_output:
f.write(line + '\n')
print(f"Wrote {len(raw_output)} raw lines to golinkfinder-raw-output.txt")
with open('golinkfinder-wordlist.txt', 'w') as f:
for path in sorted(wordlist):
f.write(path + '\n')
print(f"Wrote {len(wordlist)} paths to golinkfinder-wordlist.txt")
with open('golinkfinder-external-urls.txt', 'w') as f:
for url in sorted(external_urls):
f.write(url + '\n')
print(f"Wrote {len(external_urls)} external URLs to golinkfinder-external-urls.txt")
with open('golinkfinder-full-urls.txt', 'w') as f:
for url in sorted(domain_urls):
f.write(url + '\n')
print(f"Wrote {len(domain_urls)} domain-matching URLs to golinkfinder-full-urls.txt")
def main():
if len(sys.argv) < 2:
print("Usage: python3 golinkfinder_parser.py <urls.txt>")
print("\nThis script will:")
print(" 1. Run GoLinkFinder on each URL in the file")
print(" 2. Collect and parse all output")
print(" 3. Create organized output files:")
print(" - golinkfinder-wordlist.txt (cleaned paths)")
print(" - golinkfinder-external-urls.txt (external domain URLs)")
print(" - golinkfinder-full-urls.txt (target domain URLs)")
print(" - golinkfinder-raw-output.txt (raw GoLinkFinder output)")
print("\nRequires: GoLinkFinder (go install github.com/003random/GoLinkFinder@latest)")
sys.exit(1)
urls_file = sys.argv[1]
urls, target_domains = load_urls(urls_file)
print(f"Loaded {len(urls)} URLs with {len(target_domains)} target domains")
print(f"Target domains: {sorted(target_domains)}")
raw_output = run_golinkfinder(urls)
if not raw_output:
print("No output collected from GoLinkFinder")
sys.exit(1)
print("\nParsing collected output...")
wordlist, external_urls, domain_urls = parse_golinkfinder_output(raw_output, target_domains)
print("\nWriting output files...")
write_output_files(wordlist, external_urls, domain_urls, raw_output)
print(f"\n=== SUMMARY ===")
print(f"URLs processed: {len(urls)}")
print(f"Raw boys found: {len(raw_output)}")
print(f"Wordlist length: {len(wordlist)}")
print(f"External boys: {len(external_urls)}")
print(f"In-Scope URLs: {len(domain_urls)}")
print(f"\nFiles created:")
print(f" - golinkfinder-raw-output.txt")
print(f" - golinkfinder-wordlist.txt")
print(f" - golinkfinder-external-urls.txt")
print(f" - golinkfinder-full-urls.txt")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment