Last active
December 1, 2025 04:17
-
-
Save nberlette/4dd32080c744c9c8100e09e092ec4ff8 to your computer and use it in GitHub Desktop.
dictionary-based bulk domain hack generator CLI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S deno run -Aq --unstable-kv | |
| //! Domain Hack Generator and Availability Checker | |
| //! ----------------------------------------------------------------------- | |
| //! Copyright (c) 2025 Nicholas Berlette. All rights reserved. MIT License. | |
| //! ----------------------------------------------------------------------- | |
| // deno-lint-ignore-file no-explicit-any | |
| /** | |
| * @module domains | |
| * | |
| * Domain Hack Generator and Availability Checker | |
| * | |
| * This script compiles lists of potential domain “hacks” for a given set of | |
| * TLD or ccTLD extensions. A domain hack is created by splitting a word so | |
| * that the last characters of the word become the TLD. For example, the | |
| * English word "bakers" can form the hack `bake.rs` using the `.rs` TLD. | |
| * | |
| * The script supports three sources of candidate words: | |
| * 1. A local word list on disk (one word per line). | |
| * 2. The Merriam‑Webster Word Finder website, which lists common words | |
| * ending in a particular suffix. When using this source the script | |
| * performs a simple HTML parse to extract the words on each page. | |
| * 3. A filter list or topics list that can restrict the candidate space to | |
| * specific prefixes or words (e.g. tech‑related terms) to reduce | |
| * unnecessary network checks. | |
| * | |
| * Words are first pre‑filtered to weed out obvious junk: prefixes without | |
| * vowels, long consonant clusters, repeated characters and other unlikely | |
| * sequences are removed early on. Candidate words can also be filtered to | |
| * include only those that contain certain topics or appear in a provided | |
| * whitelist. Once candidates are selected, a heuristic scoring function | |
| * ranks them and the script checks availability via `whois` or RDAP. | |
| */ | |
| import fs from "node:fs/promises"; | |
| import { execFile } from "node:child_process"; | |
| import { parseArgs } from "node:util"; | |
| async function readFile(path: string): Promise<string> { | |
| return await fs.readFile(path, "utf8"); | |
| } | |
| async function writeFile( | |
| path: string, | |
| data: Parameters<typeof fs.writeFile>[1], | |
| ): Promise<void> { | |
| await fs.writeFile(path, data, "utf-8"); | |
| } | |
| /** | |
| * Configuration options parsed from the command line. | |
| */ | |
| export interface Config { | |
| tlds: string[]; | |
| wordlist?: string; | |
| filterlist?: string; | |
| topics?: string[]; | |
| minLength: number; | |
| maxLength: number; | |
| limit: number; | |
| offline: boolean; | |
| cacheDays: number; | |
| } | |
| /** | |
| * Default set of topics used when the user does not supply any via --topics. | |
| * These terms bias the script toward tech‑adjacent words (e.g. app, dev, api). | |
| */ | |
| const DEFAULT_TOPICS = [ | |
| "app", | |
| "dev", | |
| "tech", | |
| "code", | |
| "data", | |
| "net", | |
| "web", | |
| "api", | |
| "cloud", | |
| "crypto", | |
| "ai", | |
| "bio", | |
| "bot", | |
| "cyber", | |
| "hub", | |
| "blog", | |
| "tools", | |
| "kit", | |
| "script", | |
| "ops", | |
| "advocate", | |
| "engineer", | |
| "maker", | |
| "hack", | |
| "design", | |
| "ui", | |
| "ux", | |
| "ci", | |
| "cd", | |
| "vm", | |
| "os", | |
| "pipeline", | |
| "stack", | |
| "admin", | |
| "byte", | |
| "bit", | |
| "system", | |
| "network", | |
| "infra", | |
| "virtual", | |
| "machine", | |
| "digital", | |
| "turing", | |
| "turbo", | |
| "quantum", | |
| "qubit", | |
| "matrix", | |
| "vector", | |
| "tensor", | |
| "array", | |
| "loop", | |
| "function", | |
| "object", | |
| "class", | |
| "method", | |
| "property", | |
| "variable", | |
| "data", | |
| "struct", | |
| "type", | |
| "union", | |
| "interface", | |
| "enum", | |
| "constant", | |
| "const", | |
| "static", | |
| "dynamic", | |
| "global", | |
| "local", | |
| "thread", | |
| "atomic", | |
| "parallel", | |
| "distro", | |
| "deploy", | |
| "buffer", | |
| "blob", | |
| "ssh", | |
| "ftp", | |
| "http", | |
| "https", | |
| "sql", | |
| "git", | |
| "repo", | |
| "commit", | |
| "branch", | |
| "merge", | |
| "vcs", | |
| "pr", | |
| "pull", | |
| "issue", | |
| "rebase", | |
| "channel", | |
| "stream", | |
| "packet", | |
| ]; | |
| /** | |
| * Simple regex used to extract words from the Merriam‑Webster Word Finder | |
| * pages. The results on those pages appear within `<li><a class="badge">`. | |
| * This regex matches the class attribute and the inner text of the anchor. | |
| */ | |
| const WORD_REGEX = /<li\s*[^>]*>\s*<a\s+[^>]*class="(?<className>[^"]+)"[^>]*>(?<word>.+?)<\/a>/gi; | |
| /** | |
| * Fetch a list of words from the Merriam‑Webster Word Finder. If fetching | |
| * fails the returned array will be empty. | |
| * | |
| * @param suffix The word ending (without leading dot). For domain hacks this | |
| * corresponds to the TLD. | |
| * @param length The number of letters in the full word. Pass 0 to fetch | |
| * all lengths. | |
| * @param page The page index (1‑based). Additional pages may contain more | |
| * results for longer suffixes. | |
| * @param common When true fetch common words only; otherwise fetch all words. | |
| */ | |
| async function fetchWordsFromMerriam( | |
| suffix: string, | |
| length: number, | |
| page: number, | |
| common = true, | |
| ): Promise<string[]> { | |
| const words: string[] = []; | |
| const pathLength = (Math.abs(+length | 0) || 6).toString(); | |
| const url = | |
| `https://www.merriam-webster.com/wordfinder/classic/ends/${common ? "common" : "all"}/${pathLength}/${suffix}/${page}`; | |
| try { | |
| const res = await fetch(url, { | |
| headers: { | |
| "User-Agent": | |
| "Mozilla/5.0 (compatible; DomainHack/1.0; +https://example.com)", | |
| "Accept": | |
| "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", | |
| "Accept-Language": "en-US,en;q=0.9", | |
| }, | |
| }); | |
| if (!res.ok) { | |
| console.warn(`Failed to fetch ${url}: ${res.status}`); | |
| return words; | |
| } | |
| const html = await res.text(); | |
| let match: RegExpExecArray | null; | |
| WORD_REGEX.lastIndex = 0; | |
| while ((match = WORD_REGEX.exec(html)) !== null) { | |
| const className = match.groups?.className ?? ""; | |
| const classList = className.split(/\s+/); | |
| if (!classList.includes("badge")) continue; | |
| const word = match.groups?.word.trim().toLowerCase(); | |
| if (word) words.push(word); | |
| } | |
| } catch (err) { | |
| console.warn(`Error fetching ${url}:`, err); | |
| } | |
| return words; | |
| } | |
| /** | |
| * Load a filter list from a file. The file should contain one word or | |
| * prefix per line. Lines beginning with `#` are ignored as comments. | |
| */ | |
| async function loadFilterList(path?: string): Promise<Set<string>> { | |
| const set = new Set<string>(); | |
| if (!path) return set; | |
| try { | |
| const data = await readFile(path); | |
| for (const line of data.split(/\r?\n/)) { | |
| const trimmed = line.trim().toLowerCase(); | |
| if (trimmed && !trimmed.startsWith("#")) { | |
| set.add(trimmed); | |
| } | |
| } | |
| } catch (err) { | |
| console.warn(`Failed to load filter list ${path}:`, err); | |
| } | |
| return set; | |
| } | |
| /** | |
| * Pre‑filter the list of words before generating domain hacks. This removes | |
| * obvious junk such as prefixes without vowels, long consonant clusters, | |
| * repeated characters and words that are unlikely to form readable domain | |
| * hacks. The filter list or topics can be used to further constrain | |
| * candidates. | |
| */ | |
| function filterWords( | |
| words: string[], | |
| tld: string, | |
| cfg: Config, | |
| allowlist: Set<string>, | |
| ): string[] { | |
| const suffix = tld.toLowerCase(); | |
| const topics = cfg.topics && cfg.topics.length > 0 ? cfg.topics : DEFAULT_TOPICS; | |
| const out: string[] = []; | |
| for (const word of words) { | |
| if (!word.endsWith(suffix)) continue; | |
| if (word.length <= suffix.length) continue; | |
| const prefix = word.slice(0, -suffix.length); | |
| // Skip prefixes shorter than min length. | |
| if (prefix.length < cfg.minLength) continue; | |
| // Skip prefixes containing non‑letters. | |
| if (!/^[a-z]+$/.test(prefix)) continue; | |
| // Skip prefixes with no vowels. | |
| if (!/[aeiou]/.test(prefix)) continue; | |
| // Skip prefixes with four or more consecutive consonants. | |
| if (/[^aeiou]{4,}/.test(prefix)) continue; | |
| // Skip prefixes with triple repeated characters. | |
| if (/(.)\1\1/.test(prefix)) continue; | |
| // Skip prefixes with 'q' not followed by 'u'. | |
| if (/q(?!u)/.test(prefix)) continue; | |
| // If a filter list is provided, only include prefixes that are in the set. | |
| if (allowlist.size > 0 && !allowlist.has(prefix)) continue; | |
| // If topics are defined, ensure the prefix contains at least one. | |
| let matchTopic = false; | |
| if (topics.length > 0) { | |
| for (const term of topics) { | |
| if (prefix.includes(term)) { | |
| matchTopic = true; | |
| break; | |
| } | |
| } | |
| } | |
| if (topics.length > 0 && !matchTopic) continue; | |
| out.push(word); | |
| } | |
| return out; | |
| } | |
| /** | |
| * Load words either from a local file (newline delimited) or, if no file is | |
| * provided, by scraping Merriam‑Webster. When scraping the remote site this | |
| * function will iterate through word lengths up to the provided maximum. For | |
| * each length it will attempt to fetch up to three pages. Note that some | |
| * lengths have no pages or fewer than three; in that case the extra fetches | |
| * simply return empty arrays. | |
| */ | |
| async function loadCandidateWords(cfg: Config, tld: string): Promise<string[]> { | |
| const words: string[] = []; | |
| if (cfg.wordlist) { | |
| const data = await readFile(cfg.wordlist); | |
| const lines = data.split(/[\r\n]+/).filter((v) => v.trim()); | |
| const normalized = lines.map((w) => w.trim().normalize("NFKC").toLowerCase()); | |
| words.push(...normalized); | |
| } else if (cfg.offline) { | |
| console.error( | |
| `Offline mode set but no word list provided. Please supply --wordlist to use offline mode.`, | |
| ); | |
| return []; | |
| } else { | |
| for (let len = cfg.minLength; len <= cfg.maxLength; len++) { | |
| for (let page = 1; page <= 3; page++) { | |
| const pageWords = await fetchWordsFromMerriam(tld, len, page, true); | |
| if (pageWords.length === 0) break; | |
| words.push(...pageWords); | |
| } | |
| } | |
| } | |
| let filtered = [...new Set(words)].filter((w) => w.length > Math.max(0, +cfg.minLength || 0)); | |
| filtered = filtered.filter((w) => w.length <= Math.min(100, +cfg.maxLength || 100)); | |
| filtered = filtered.filter((w) => w.endsWith(tld.toLowerCase().trim().replace(/^\./, ""))); | |
| return filtered; | |
| } | |
| /** | |
| * Advanced scoring heuristic. Lower scores indicate more desirable names. | |
| */ | |
| export function scoreHack(prefix: string, tld: string): number { | |
| let score = 0; | |
| const length = prefix.length; | |
| score += length; | |
| if (/[^aeiou]{3,}/.test(prefix)) score += 2; | |
| if (/(.)\1\1/.test(prefix)) score += 1; | |
| const letterPenalty: Record<string, number> = { | |
| e: -0.5, t: -0.5, a: -0.5, o: -0.5, i: -0.5, n: -0.4, | |
| s: -0.4, r: -0.4, h: -0.3, l: -0.3, | |
| d: -0.2, c: -0.2, u: -0.2, m: -0.2, f: -0.2, | |
| w: -0.1, g: -0.1, y: -0.1, p: -0.1, b: -0.1, | |
| v: -0.1, k: -0.1, | |
| x: 0.5, j: 0.5, q: 0.7, z: 0.5, | |
| }; | |
| for (const ch of prefix) { | |
| score += letterPenalty[ch] ?? 0; | |
| } | |
| const vowels = (prefix.match(/[aeiou]/g) || []).length; | |
| score -= vowels * 0.3; | |
| if (/[aeiounlr]$/.test(prefix)) score -= 0.5; | |
| score += tld.length * 0.5; | |
| return score; | |
| } | |
| /** | |
| * Determine whether a domain appears to be available by running the `whois` | |
| * command. Many WHOIS servers return a standard “no match” string when a | |
| * domain is unregistered. Because output formats vary widely across TLDs | |
| * this function checks for a handful of common phrases. If the command | |
| * fails or times out, `undefined` is returned to indicate an inconclusive | |
| * result. | |
| */ | |
| export function whois(domain: string, timeout = 10_000): Promise<boolean | undefined> { | |
| return new Promise((resolve) => { | |
| const proc = execFile("whois", [domain], { timeout }, (err, stdout) => { | |
| if (err) return resolve(undefined); | |
| const text = String(stdout).toLowerCase(); | |
| const patterns = [ | |
| "no match", | |
| "no data found", | |
| "no entries found", | |
| "domain not found", | |
| "is not registered", | |
| "registration status: available", | |
| "available for registration", | |
| "domain you requested is not", | |
| ]; | |
| for (const phrase of patterns) { | |
| if (text.includes(phrase)) return resolve(true); | |
| } | |
| resolve(false); | |
| }); | |
| setTimeout(() => { | |
| try { | |
| proc.kill(); | |
| } catch { | |
| /* ignore */ | |
| } finally { | |
| resolve(undefined); | |
| } | |
| }, timeout); | |
| }); | |
| } | |
| /** | |
| * Attempt to determine domain availability via RDAP and a simple domain | |
| * database lookup. The RDAP endpoint returns a JSON payload when a domain | |
| * is registered and typically responds with 404 for an unregistered name. | |
| */ | |
| export async function rdap(domain: string): Promise<boolean | undefined> { | |
| try { | |
| const res = await fetch(`https://rdap.org/domain/${domain}`); | |
| return res.status === 404 ? true : res.ok ? false : undefined; | |
| } catch { | |
| /* ignore */ | |
| } | |
| return undefined; | |
| } | |
| async function domainsDB(domain: string): Promise<boolean | undefined> { | |
| try { | |
| const url = `https://api.domainsdb.info/v1/domains/search?domain=${encodeURIComponent(domain)}`; | |
| const res = await fetch(url); | |
| if (res.ok) { | |
| const data = await res.json() as any; | |
| const found = Array.isArray(data?.domains) && data.domains.some((d: { domain: string | null }) => d.domain?.toLowerCase() === domain.toLowerCase()); | |
| return found ? false : true; | |
| } | |
| } catch { | |
| /* ignore */ | |
| } | |
| return undefined; | |
| } | |
| /** | |
| * Check whether a domain name appears to be available. This function first | |
| * attempts to use the local `whois` command. If that command isn’t | |
| * installed or the result is inconclusive it falls back to RDAP and | |
| * DomainsDB queries. A return value of `undefined` means the availability | |
| * could not be determined. | |
| */ | |
| export async function isDomainAvailable(domain: string): Promise<boolean | undefined> { | |
| return await whois(domain) ?? await rdap(domain) ?? await domainsDB(domain) ?? undefined; | |
| } | |
| /** | |
| * Structure returned when checking a candidate domain. | |
| */ | |
| export interface CandidateResult { | |
| domain: string; | |
| prefix: string; | |
| tld: string; | |
| score: number; | |
| available: boolean | undefined; | |
| } | |
| /** | |
| * Generate domain hacks for a given TLD from the supplied word list. Words | |
| * are pre‑filtered using morphological heuristics, topics and filter lists. | |
| * Candidate hacks are then scored and trimmed to the top `limit` entries. | |
| */ | |
| export function generateCandidates( | |
| words: string[], | |
| tld: string, | |
| cfg: Config, | |
| allowlist: Set<string>, | |
| ): CandidateResult[] { | |
| const filtered = filterWords(words, tld, cfg, allowlist); | |
| const suffix = tld.toLowerCase(); | |
| const results: CandidateResult[] = []; | |
| for (const word of filtered) { | |
| const prefix = word.slice(0, -suffix.length); | |
| const score = scoreHack(prefix, suffix); | |
| const domain = `${prefix}.${suffix}`; | |
| results.push({ domain, prefix, tld: suffix, score, available: undefined }); | |
| } | |
| results.sort((a, b) => a.score - b.score); | |
| return results.slice(0, cfg.limit); | |
| } | |
| /** | |
| * Perform availability checks on a list of candidate domains. Results are | |
| * saved into Deno KV with the configured TTL. A concurrency limit helps | |
| * avoid overloading WHOIS servers or remote APIs. | |
| */ | |
| export async function checkCandidates( | |
| candidates: CandidateResult[], | |
| ttlMs: number, | |
| ): Promise<CandidateResult[]> { | |
| const useKv = typeof Deno !== "undefined" && typeof Deno.openKv === "function"; | |
| let kv: any = undefined; | |
| let cache: Record<string, { available: boolean | undefined; checkedAt: number }> = {}; | |
| const cacheFile = ".domainhack_cache.json"; | |
| if (useKv) { | |
| kv = await Deno.openKv(); | |
| } else { | |
| try { | |
| const text = await readFile(cacheFile); | |
| cache = JSON.parse(text); | |
| } catch { | |
| cache = {}; | |
| } | |
| } | |
| const concurrency = 5; | |
| let index = 0; | |
| const checked: CandidateResult[] = []; | |
| async function worker() { | |
| while (true) { | |
| const i = index++; | |
| if (i >= candidates.length) return; | |
| const candidate = candidates[i]; | |
| const key = candidate.domain; | |
| if (useKv) { | |
| const kvKey = ["availability", key]; | |
| const cachedVal = await kv.get(kvKey); | |
| if (cachedVal?.value && typeof cachedVal.value.available === "boolean" && typeof cachedVal.value.checkedAt === "number") { | |
| const age = Date.now() - cachedVal.value.checkedAt; | |
| if (age < ttlMs) { | |
| candidate.available = cachedVal.value.available; | |
| checked.push(candidate); | |
| continue; | |
| } | |
| } | |
| const available = await isDomainAvailable(candidate.domain); | |
| candidate.available = available; | |
| await kv.set(kvKey, { available, checkedAt: Date.now() }, { expireIn: ttlMs }); | |
| checked.push(candidate); | |
| } else { | |
| const entry = cache[key]; | |
| if (entry && typeof entry.available === "boolean" && typeof entry.checkedAt === "number") { | |
| const age = Date.now() - entry.checkedAt; | |
| if (age < ttlMs) { | |
| candidate.available = entry.available; | |
| checked.push(candidate); | |
| continue; | |
| } | |
| } | |
| const available = await isDomainAvailable(candidate.domain); | |
| candidate.available = available; | |
| cache[key] = { available, checkedAt: Date.now() }; | |
| checked.push(candidate); | |
| } | |
| } | |
| } | |
| const workers: Promise<void>[] = []; | |
| for (let i = 0; i < concurrency; i++) { | |
| workers.push(worker()); | |
| } | |
| await Promise.all(workers); | |
| if (!useKv) { | |
| try { | |
| await writeFile(cacheFile, JSON.stringify(cache, undefined, 2)); | |
| } catch { | |
| /* ignore */ | |
| } | |
| } | |
| return checked; | |
| } | |
| /** | |
| * Entry point. Parses CLI flags, loads words, generates domain hacks and | |
| * performs availability checks. Results are printed to stdout. | |
| */ | |
| async function main() { | |
| const { values: args } = parseArgs({ | |
| args: Deno.args, | |
| allowNegative: true, | |
| allowPositionals: false, | |
| options: { | |
| tlds: { type: "string", default: ["rs", "st", "ax", "me", "ly", "ci", "ai", "am", "ws"], short: "t", multiple: true }, | |
| wordlist: { type: "string", default: undefined, short: "w" }, | |
| filterlist: { type: "string", default: undefined, short: "f" }, | |
| topics: { type: "string", default: undefined, short: "k", multiple: true }, | |
| offline: { type: "boolean", default: false, short: "o" }, | |
| minLength: { type: "string", default: "3", short: "m" }, | |
| maxLength: { type: "string", default: "12", short: "M" }, | |
| limit: { type: "string", default: "100", short: "l" }, | |
| cacheDays: { type: "string", default: "30", short: "c" }, | |
| }, | |
| }); | |
| const cfg: Config = { | |
| tlds: args.tlds.flatMap((t: string) => t.trim().split(/[,\s]+/).filter((s) => s.trim())), | |
| wordlist: args.wordlist, | |
| filterlist: args.filterlist, | |
| topics: args.topics ? args.topics.flatMap((k: string) => k.trim().split(/[\s,]+/).filter(Boolean)).map((x: string) => x.toLowerCase()) : undefined, | |
| offline: !!args.offline, | |
| minLength: Number(args.minLength) || 3, | |
| maxLength: Number(args.maxLength) || 12, | |
| limit: Number(args.limit) || 100, | |
| cacheDays: Number(args.cacheDays) || 30, | |
| }; | |
| if (cfg.tlds.length === 0) { | |
| console.error("You must specify at least one TLD via --tlds."); | |
| Deno.exit(1); | |
| } | |
| // Load filter list once | |
| const allowlist = await loadFilterList(cfg.filterlist); | |
| for (const tld of cfg.tlds) { | |
| console.log(`Generating domain hacks for .${tld} …`); | |
| const words = await loadCandidateWords(cfg, tld); | |
| console.log(` ✔︎ Retrieved ${words.length} words ending in ${tld}`); | |
| const candidates = generateCandidates(words, tld, cfg, allowlist); | |
| console.log(` ✔︎ Generated ${candidates.length} candidate hacks`); | |
| const ttlMs = cfg.cacheDays * 24 * 60 * 60 * 1000; | |
| const results = await checkCandidates(candidates, ttlMs); | |
| results.sort((a, b) => { | |
| if (a.available === b.available) return a.score - b.score; | |
| if (a.available === true) return -1; | |
| if (b.available === true) return 1; | |
| return 0; | |
| }); | |
| if (results.length) { | |
| console.log("\nResults for ." + tld); | |
| console.log("========================\n"); | |
| let header = "Domain".padEnd(40); | |
| header += " | "; | |
| header += "Status".padEnd(12); | |
| header += " | "; | |
| header += "Score"; | |
| console.log("| " + header + " |"); | |
| console.log("|-" + header.replace(/[^-|]/g, "-") + "-| "); | |
| for (const res of results) { | |
| const status = (res.available === undefined ? "unknown" : res.available ? "available" : "taken").padEnd(12); | |
| const domain = res.domain.padEnd(40, "."); | |
| const score = res.score.toFixed(2); | |
| console.log(`| ${domain} | ${status} | ${score} |`); | |
| } | |
| } | |
| console.log(); | |
| } | |
| } | |
| if (import.meta.main) await main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment