nberlette · December 1, 2025 04:17
diff --git a/domain_hacks.ts b/domain_hacks.ts
 #!/usr/bin/env -S deno run -Aq --unstable-kv

 //! Domain Hack Generator and Availability Checker
 //! -----------------------------------------------------------------------
 //! Copyright (c) 2025 Nicholas Berlette. All rights reserved. MIT License.
 //! -----------------------------------------------------------------------

 // deno-lint-ignore-file no-explicit-any

 /**
 * @module domains
 *
 * Domain Hack Generator and Availability Checker
 *
 * This script compiles lists of potential domain “hacks” for a given set of
 * TLD or ccTLD extensions. A domain hack is created by splitting a word so
 * that the last characters of the word become the TLD. For example, the
 * English word "bakers" can form the hack `bake.rs` using the `.rs` TLD.
 *
 * The script supports three sources of candidate words:
 *   1. A local word list on disk (one word per line).
 *   2. The Merriam‑Webster Word Finder website, which lists common words
 *      ending in a particular suffix.  When using this source the script
 *      performs a simple HTML parse to extract the words on each page.
 *   3. A filter list or topics list that can restrict the candidate space to
 *      specific prefixes or words (e.g. tech‑related terms) to reduce
 *      unnecessary network checks.
 *
 * Words are first pre‑filtered to weed out obvious junk: prefixes without
 * vowels, long consonant clusters, repeated characters and other unlikely
 * sequences are removed early on.  Candidate words can also be filtered to
 * include only those that contain certain topics or appear in a provided
 * whitelist.  Once candidates are selected, a heuristic scoring function
 * ranks them and the script checks availability via `whois` or RDAP.
 */

 import fs from "node:fs/promises";
 import { execFile } from "node:child_process";
 import { parseArgs } from "node:util";

 async function readFile(path: string): Promise<string> {
  return await fs.readFile(path, "utf8");
 }

 async function writeFile(
  path: string,
  data: Parameters<typeof fs.writeFile>[1],
 ): Promise<void> {
  await fs.writeFile(path, data, "utf-8");
 }

 /**
 * Configuration options parsed from the command line.
 */
 export interface Config {
  tlds: string[];
  wordlist?: string;
  filterlist?: string;
  topics?: string[];
  minLength: number;
  maxLength: number;
  limit: number;
  offline: boolean;
  cacheDays: number;
 }

 /**
 * Default set of topics used when the user does not supply any via --topics.
 * These terms bias the script toward tech‑adjacent words (e.g. app, dev, api).
 */
 const DEFAULT_TOPICS = [
  "app",
  "dev",
  "tech",
  "code",
  "data",
  "net",
  "web",
  "api",
  "cloud",
  "crypto",
  "ai",
  "bio",
  "bot",
  "cyber",
  "hub",
  "blog",
  "tools",
  "kit",
  "script",
  "ops",
  "advocate",
  "engineer",
  "maker",
  "hack",
  "design",
  "ui",
  "ux",
  "ci",
  "cd",
  "vm",
  "os",
  "pipeline",
  "stack",
  "admin",
  "byte",
  "bit",
  "system",
  "network",
  "infra",
  "virtual",
  "machine",
  "digital",
  "turing",
  "turbo",
  "quantum",
  "qubit",
  "matrix",
  "vector",
  "tensor",
  "array",
  "loop",
  "function",
  "object",
  "class",
  "method",
  "property",
  "variable",
  "data",
  "struct",
  "type",
  "union",
  "interface",
  "enum",
  "constant",
  "const",
  "static",
  "dynamic",
  "global",
  "local",
  "thread",
  "atomic",
  "parallel",
  "distro",
  "deploy",
  "buffer",
  "blob",
  "ssh",
  "ftp",
  "http",
  "https",
  "sql",
  "git",
  "repo",
  "commit",
  "branch",
  "merge",
  "vcs",
  "pr",
  "pull",
  "issue",
  "rebase",
  "channel",
  "stream",
  "packet",
 ];

 /**
 * Simple regex used to extract words from the Merriam‑Webster Word Finder
 * pages.  The results on those pages appear within `<li><a class="badge">`.
 * This regex matches the class attribute and the inner text of the anchor.
 */
 const WORD_REGEX = /<li\s*[^>]*>\s*<a\s+[^>]*class="(?<className>[^"]+)"[^>]*>(?<word>.+?)<\/a>/gi;

 /**
 * Fetch a list of words from the Merriam‑Webster Word Finder.  If fetching
 * fails the returned array will be empty.
 *
 * @param suffix The word ending (without leading dot).  For domain hacks this
 *        corresponds to the TLD.
 * @param length The number of letters in the full word.  Pass 0 to fetch
 *        all lengths.
 * @param page The page index (1‑based).  Additional pages may contain more
 *        results for longer suffixes.
 * @param common When true fetch common words only; otherwise fetch all words.
 */
 async function fetchWordsFromMerriam(
  suffix: string,
  length: number,
  page: number,
  common = true,
 ): Promise<string[]> {
  const words: string[] = [];
  const pathLength = (Math.abs(+length | 0) || 6).toString();
  const url =
    `https://www.merriam-webster.com/wordfinder/classic/ends/${common ? "common" : "all"}/${pathLength}/${suffix}/${page}`;
  try {
    const res = await fetch(url, {
      headers: {
        "User-Agent":
          "Mozilla/5.0 (compatible; DomainHack/1.0; +https://example.com)",
        "Accept":
          "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "en-US,en;q=0.9",
      },
    });
    if (!res.ok) {
      console.warn(`Failed to fetch ${url}: ${res.status}`);
      return words;
    }
    const html = await res.text();
    let match: RegExpExecArray | null;
    WORD_REGEX.lastIndex = 0;
    while ((match = WORD_REGEX.exec(html)) !== null) {
      const className = match.groups?.className ?? "";
      const classList = className.split(/\s+/);
      if (!classList.includes("badge")) continue;
      const word = match.groups?.word.trim().toLowerCase();
      if (word) words.push(word);
    }
  } catch (err) {
    console.warn(`Error fetching ${url}:`, err);
  }
  return words;
 }

 /**
 * Load a filter list from a file.  The file should contain one word or
 * prefix per line.  Lines beginning with `#` are ignored as comments.
 */
 async function loadFilterList(path?: string): Promise<Set<string>> {
  const set = new Set<string>();
  if (!path) return set;
  try {
    const data = await readFile(path);
    for (const line of data.split(/\r?\n/)) {
      const trimmed = line.trim().toLowerCase();
      if (trimmed && !trimmed.startsWith("#")) {
        set.add(trimmed);
      }
    }
  } catch (err) {
    console.warn(`Failed to load filter list ${path}:`, err);
  }
  return set;
 }

 /**
 * Pre‑filter the list of words before generating domain hacks.  This removes
 * obvious junk such as prefixes without vowels, long consonant clusters,
 * repeated characters and words that are unlikely to form readable domain
 * hacks.  The filter list or topics can be used to further constrain
 * candidates.
 */
 function filterWords(
  words: string[],
  tld: string,
  cfg: Config,
  allowlist: Set<string>,
 ): string[] {
  const suffix = tld.toLowerCase();
  const topics = cfg.topics && cfg.topics.length > 0 ? cfg.topics : DEFAULT_TOPICS;
  const out: string[] = [];
  for (const word of words) {
    if (!word.endsWith(suffix)) continue;
    if (word.length <= suffix.length) continue;
    const prefix = word.slice(0, -suffix.length);
    // Skip prefixes shorter than min length.
    if (prefix.length < cfg.minLength) continue;
    // Skip prefixes containing non‑letters.
    if (!/^[a-z]+$/.test(prefix)) continue;
    // Skip prefixes with no vowels.
    if (!/[aeiou]/.test(prefix)) continue;
    // Skip prefixes with four or more consecutive consonants.
    if (/[^aeiou]{4,}/.test(prefix)) continue;
    // Skip prefixes with triple repeated characters.
    if (/(.)\1\1/.test(prefix)) continue;
    // Skip prefixes with 'q' not followed by 'u'.
    if (/q(?!u)/.test(prefix)) continue;
    // If a filter list is provided, only include prefixes that are in the set.
    if (allowlist.size > 0 && !allowlist.has(prefix)) continue;
    // If topics are defined, ensure the prefix contains at least one.
    let matchTopic = false;
    if (topics.length > 0) {
      for (const term of topics) {
        if (prefix.includes(term)) {
          matchTopic = true;
          break;
        }
      }
    }
    if (topics.length > 0 && !matchTopic) continue;
    out.push(word);
  }
  return out;
 }

 /**
 * Load words either from a local file (newline delimited) or, if no file is
 * provided, by scraping Merriam‑Webster.  When scraping the remote site this
 * function will iterate through word lengths up to the provided maximum.  For
 * each length it will attempt to fetch up to three pages.  Note that some
 * lengths have no pages or fewer than three; in that case the extra fetches
 * simply return empty arrays.
 */
 async function loadCandidateWords(cfg: Config, tld: string): Promise<string[]> {
  const words: string[] = [];
  if (cfg.wordlist) {
    const data = await readFile(cfg.wordlist);
    const lines = data.split(/[\r\n]+/).filter((v) => v.trim());
    const normalized = lines.map((w) => w.trim().normalize("NFKC").toLowerCase());
    words.push(...normalized);
  } else if (cfg.offline) {
    console.error(
      `Offline mode set but no word list provided. Please supply --wordlist to use offline mode.`,
    );
    return [];
  } else {
    for (let len = cfg.minLength; len <= cfg.maxLength; len++) {
      for (let page = 1; page <= 3; page++) {
        const pageWords = await fetchWordsFromMerriam(tld, len, page, true);
        if (pageWords.length === 0) break;
        words.push(...pageWords);
      }
    }
  }

  let filtered = [...new Set(words)].filter((w) => w.length > Math.max(0, +cfg.minLength || 0));
  filtered = filtered.filter((w) => w.length <= Math.min(100, +cfg.maxLength || 100));
  filtered = filtered.filter((w) => w.endsWith(tld.toLowerCase().trim().replace(/^\./, "")));
  return filtered;
 }

 /**
 * Advanced scoring heuristic.  Lower scores indicate more desirable names.
 */
 export function scoreHack(prefix: string, tld: string): number {
  let score = 0;
  const length = prefix.length;
  score += length;
  if (/[^aeiou]{3,}/.test(prefix)) score += 2;
  if (/(.)\1\1/.test(prefix)) score += 1;
  const letterPenalty: Record<string, number> = {
    e: -0.5, t: -0.5, a: -0.5, o: -0.5, i: -0.5, n: -0.4,
    s: -0.4, r: -0.4, h: -0.3, l: -0.3,
    d: -0.2, c: -0.2, u: -0.2, m: -0.2, f: -0.2,
    w: -0.1, g: -0.1, y: -0.1, p: -0.1, b: -0.1,
    v: -0.1, k: -0.1,
    x: 0.5, j: 0.5, q: 0.7, z: 0.5,
  };
  for (const ch of prefix) {
    score += letterPenalty[ch] ?? 0;
  }
  const vowels = (prefix.match(/[aeiou]/g) || []).length;
  score -= vowels * 0.3;
  if (/[aeiounlr]$/.test(prefix)) score -= 0.5;
  score += tld.length * 0.5;
  return score;
 }

 /**
 * Determine whether a domain appears to be available by running the `whois`
 * command.  Many WHOIS servers return a standard “no match” string when a
 * domain is unregistered.  Because output formats vary widely across TLDs
 * this function checks for a handful of common phrases.  If the command
 * fails or times out, `undefined` is returned to indicate an inconclusive
 * result.
 */
 export function whois(domain: string, timeout = 10_000): Promise<boolean | undefined> {
  return new Promise((resolve) => {
    const proc = execFile("whois", [domain], { timeout }, (err, stdout) => {
      if (err) return resolve(undefined);
      const text = String(stdout).toLowerCase();
      const patterns = [
        "no match",
        "no data found",
        "no entries found",
        "domain not found",
        "is not registered",
        "registration status: available",
        "available for registration",
        "domain you requested is not",
      ];
      for (const phrase of patterns) {
        if (text.includes(phrase)) return resolve(true);
      }
      resolve(false);
    });

    setTimeout(() => {
      try {
        proc.kill();
      } catch {
        /* ignore */
      } finally {
        resolve(undefined);
      }
    }, timeout);
  });
 }

 /**
 * Attempt to determine domain availability via RDAP and a simple domain
 * database lookup.  The RDAP endpoint returns a JSON payload when a domain
 * is registered and typically responds with 404 for an unregistered name.
 */
 export async function rdap(domain: string): Promise<boolean | undefined> {
  try {
    const res = await fetch(`https://rdap.org/domain/${domain}`);
    return res.status === 404 ? true : res.ok ? false : undefined;
  } catch {
    /* ignore */
  }
  return undefined;
 }
 async function domainsDB(domain: string): Promise<boolean | undefined> {
  try {
    const url = `https://api.domainsdb.info/v1/domains/search?domain=${encodeURIComponent(domain)}`;
    const res = await fetch(url);
    if (res.ok) {
      const data = await res.json() as any;
      const found = Array.isArray(data?.domains) && data.domains.some((d: { domain: string | null }) => d.domain?.toLowerCase() === domain.toLowerCase());
      return found ? false : true;
    }
  } catch {
    /* ignore */
  }
  return undefined;
 }

 /**
 * Check whether a domain name appears to be available.  This function first
 * attempts to use the local `whois` command.  If that command isn’t
 * installed or the result is inconclusive it falls back to RDAP and
 * DomainsDB queries.  A return value of `undefined` means the availability
 * could not be determined.
 */
 export async function isDomainAvailable(domain: string): Promise<boolean | undefined> {
  return await whois(domain) ?? await rdap(domain) ?? await domainsDB(domain) ?? undefined;
 }

 /**
 * Structure returned when checking a candidate domain.
 */
 export interface CandidateResult {
  domain: string;
  prefix: string;
  tld: string;
  score: number;
  available: boolean | undefined;
 }

 /**
 * Generate domain hacks for a given TLD from the supplied word list.  Words
 * are pre‑filtered using morphological heuristics, topics and filter lists.
 * Candidate hacks are then scored and trimmed to the top `limit` entries.
 */
 export function generateCandidates(
  words: string[],
  tld: string,
  cfg: Config,
  allowlist: Set<string>,
 ): CandidateResult[] {
  const filtered = filterWords(words, tld, cfg, allowlist);
  const suffix = tld.toLowerCase();
  const results: CandidateResult[] = [];
  for (const word of filtered) {
    const prefix = word.slice(0, -suffix.length);
    const score = scoreHack(prefix, suffix);
    const domain = `${prefix}.${suffix}`;
    results.push({ domain, prefix, tld: suffix, score, available: undefined });
  }
  results.sort((a, b) => a.score - b.score);
  return results.slice(0, cfg.limit);
 }

 /**
 * Perform availability checks on a list of candidate domains.  Results are
 * saved into Deno KV with the configured TTL.  A concurrency limit helps
 * avoid overloading WHOIS servers or remote APIs.
 */
 export async function checkCandidates(
  candidates: CandidateResult[],
  ttlMs: number,
 ): Promise<CandidateResult[]> {
  const useKv = typeof Deno !== "undefined" && typeof Deno.openKv === "function";
  let kv: any = undefined;
  let cache: Record<string, { available: boolean | undefined; checkedAt: number }> = {};
  const cacheFile = ".domainhack_cache.json";
  if (useKv) {
    kv = await Deno.openKv();
  } else {
    try {
      const text = await readFile(cacheFile);
      cache = JSON.parse(text);
    } catch {
      cache = {};
    }
  }
  const concurrency = 5;
  let index = 0;
  const checked: CandidateResult[] = [];
  async function worker() {
    while (true) {
      const i = index++;
      if (i >= candidates.length) return;
      const candidate = candidates[i];
      const key = candidate.domain;
      if (useKv) {
        const kvKey = ["availability", key];
        const cachedVal = await kv.get(kvKey);
        if (cachedVal?.value && typeof cachedVal.value.available === "boolean" && typeof cachedVal.value.checkedAt === "number") {
          const age = Date.now() - cachedVal.value.checkedAt;
          if (age < ttlMs) {
            candidate.available = cachedVal.value.available;
            checked.push(candidate);
            continue;
          }
        }
        const available = await isDomainAvailable(candidate.domain);
        candidate.available = available;
        await kv.set(kvKey, { available, checkedAt: Date.now() }, { expireIn: ttlMs });
        checked.push(candidate);
      } else {
        const entry = cache[key];
        if (entry && typeof entry.available === "boolean" && typeof entry.checkedAt === "number") {
          const age = Date.now() - entry.checkedAt;
          if (age < ttlMs) {
            candidate.available = entry.available;
            checked.push(candidate);
            continue;
          }
        }
        const available = await isDomainAvailable(candidate.domain);
        candidate.available = available;
        cache[key] = { available, checkedAt: Date.now() };
        checked.push(candidate);
      }
    }
  }
  const workers: Promise<void>[] = [];
  for (let i = 0; i < concurrency; i++) {
    workers.push(worker());
  }
  await Promise.all(workers);
  if (!useKv) {
    try {
      await writeFile(cacheFile, JSON.stringify(cache, undefined, 2));
    } catch {
      /* ignore */
    }
  }
  return checked;
 }

 /**
 * Entry point.  Parses CLI flags, loads words, generates domain hacks and
 * performs availability checks.  Results are printed to stdout.
 */
 async function main() {
  const { values: args } = parseArgs({
    args: Deno.args,
    allowNegative: true,
    allowPositionals: false,
    options: {
      tlds: { type: "string", default: ["rs", "st", "ax", "me", "ly", "ci", "ai", "am", "ws"], short: "t", multiple: true },
      wordlist: { type: "string", default: undefined, short: "w" },
      filterlist: { type: "string", default: undefined, short: "f" },
      topics: { type: "string", default: undefined, short: "k", multiple: true },
      offline: { type: "boolean", default: false, short: "o" },
      minLength: { type: "string", default: "3", short: "m" },
      maxLength: { type: "string", default: "12", short: "M" },
      limit: { type: "string", default: "100", short: "l" },
      cacheDays: { type: "string", default: "30", short: "c" },
    },
  });
  const cfg: Config = {
    tlds: args.tlds.flatMap((t: string) => t.trim().split(/[,\s]+/).filter((s) => s.trim())),
    wordlist: args.wordlist,
    filterlist: args.filterlist,
    topics: args.topics ? args.topics.flatMap((k: string) => k.trim().split(/[\s,]+/).filter(Boolean)).map((x: string) => x.toLowerCase()) : undefined,
    offline: !!args.offline,
    minLength: Number(args.minLength) || 3,
    maxLength: Number(args.maxLength) || 12,
    limit: Number(args.limit) || 100,
    cacheDays: Number(args.cacheDays) || 30,
  };
  if (cfg.tlds.length === 0) {
    console.error("You must specify at least one TLD via --tlds.");
    Deno.exit(1);
  }
  // Load filter list once
  const allowlist = await loadFilterList(cfg.filterlist);
  for (const tld of cfg.tlds) {
    console.log(`Generating domain hacks for .${tld} …`);
    const words = await loadCandidateWords(cfg, tld);
    console.log(`  ✔︎ Retrieved ${words.length} words ending in ${tld}`);
    const candidates = generateCandidates(words, tld, cfg, allowlist);
    console.log(`  ✔︎ Generated ${candidates.length} candidate hacks`);
    const ttlMs = cfg.cacheDays * 24 * 60 * 60 * 1000;
    const results = await checkCandidates(candidates, ttlMs);
    results.sort((a, b) => {
      if (a.available === b.available) return a.score - b.score;
      if (a.available === true) return -1;
      if (b.available === true) return 1;
      return 0;
    });
    if (results.length) {
      console.log("\nResults for ." + tld);
      console.log("========================\n");
      let header = "Domain".padEnd(40);
      header += " | ";
      header += "Status".padEnd(12);
      header += " | ";
      header += "Score";
      console.log("| " + header + " |");
      console.log("|-" + header.replace(/[^-|]/g, "-") + "-| ");
      for (const res of results) {
        const status = (res.available === undefined ? "unknown" : res.available ? "available" : "taken").padEnd(12);
        const domain = res.domain.padEnd(40, ".");
        const score = res.score.toFixed(2);
        console.log(`| ${domain} | ${status} | ${score} |`);
      }
    }
    console.log();
  }
 }

 if (import.meta.main) await main();
	#!/usr/bin/env -S deno run -Aq --unstable-kv

	//! Domain Hack Generator and Availability Checker
	//! -----------------------------------------------------------------------
	//! Copyright (c) 2025 Nicholas Berlette. All rights reserved. MIT License.
	//! -----------------------------------------------------------------------

	// deno-lint-ignore-file no-explicit-any

	/**
	* @module domains
	*
	* Domain Hack Generator and Availability Checker
	*
	* This script compiles lists of potential domain “hacks” for a given set of
	* TLD or ccTLD extensions. A domain hack is created by splitting a word so
	* that the last characters of the word become the TLD. For example, the
	* English word "bakers" can form the hack `bake.rs` using the `.rs` TLD.
	*
	* The script supports three sources of candidate words:
	* 1. A local word list on disk (one word per line).
	* 2. The Merriam‑Webster Word Finder website, which lists common words
	* ending in a particular suffix. When using this source the script
	* performs a simple HTML parse to extract the words on each page.
	* 3. A filter list or topics list that can restrict the candidate space to
	* specific prefixes or words (e.g. tech‑related terms) to reduce
	* unnecessary network checks.
	*
	* Words are first pre‑filtered to weed out obvious junk: prefixes without
	* vowels, long consonant clusters, repeated characters and other unlikely
	* sequences are removed early on. Candidate words can also be filtered to
	* include only those that contain certain topics or appear in a provided
	* whitelist. Once candidates are selected, a heuristic scoring function
	* ranks them and the script checks availability via `whois` or RDAP.
	*/

	import fs from "node:fs/promises";
	import { execFile } from "node:child_process";
	import { parseArgs } from "node:util";

	async function readFile(path: string): Promise<string> {
	return await fs.readFile(path, "utf8");
	}

	async function writeFile(
	path: string,
	data: Parameters<typeof fs.writeFile>[1],
	): Promise<void> {
	await fs.writeFile(path, data, "utf-8");
	}

	/**
	* Configuration options parsed from the command line.
	*/
	export interface Config {
	tlds: string[];
	wordlist?: string;
	filterlist?: string;
	topics?: string[];
	minLength: number;
	maxLength: number;
	limit: number;
	offline: boolean;
	cacheDays: number;
	}

	/**
	* Default set of topics used when the user does not supply any via --topics.
	* These terms bias the script toward tech‑adjacent words (e.g. app, dev, api).
	*/
	const DEFAULT_TOPICS = [
	"app",
	"dev",
	"tech",
	"code",
	"data",
	"net",
	"web",
	"api",
	"cloud",
	"crypto",
	"ai",
	"bio",
	"bot",
	"cyber",
	"hub",
	"blog",
	"tools",
	"kit",
	"script",
	"ops",
	"advocate",
	"engineer",
	"maker",
	"hack",
	"design",
	"ui",
	"ux",
	"ci",
	"cd",
	"vm",
	"os",
	"pipeline",
	"stack",
	"admin",
	"byte",
	"bit",
	"system",
	"network",
	"infra",
	"virtual",
	"machine",
	"digital",
	"turing",
	"turbo",
	"quantum",
	"qubit",
	"matrix",
	"vector",
	"tensor",
	"array",
	"loop",
	"function",
	"object",
	"class",
	"method",
	"property",
	"variable",
	"data",
	"struct",
	"type",
	"union",
	"interface",
	"enum",
	"constant",
	"const",
	"static",
	"dynamic",
	"global",
	"local",
	"thread",
	"atomic",
	"parallel",
	"distro",
	"deploy",
	"buffer",
	"blob",
	"ssh",
	"ftp",
	"http",
	"https",
	"sql",
	"git",
	"repo",
	"commit",
	"branch",
	"merge",
	"vcs",
	"pr",
	"pull",
	"issue",
	"rebase",
	"channel",
	"stream",
	"packet",
	];

	/**
	* Simple regex used to extract words from the Merriam‑Webster Word Finder
	* pages. The results on those pages appear within `<li><a class="badge">`.
	* This regex matches the class attribute and the inner text of the anchor.
	*/
	const WORD_REGEX = /<li\s[^>]>\s<a\s+[^>]class="(?<className>[^"]+)"[^>]*>(?<word>.+?)<\/a>/gi;

	/**
	* Fetch a list of words from the Merriam‑Webster Word Finder. If fetching
	* fails the returned array will be empty.
	*
	* @param suffix The word ending (without leading dot). For domain hacks this
	* corresponds to the TLD.
	* @param length The number of letters in the full word. Pass 0 to fetch
	* all lengths.
	* @param page The page index (1‑based). Additional pages may contain more
	* results for longer suffixes.
	* @param common When true fetch common words only; otherwise fetch all words.
	*/
	async function fetchWordsFromMerriam(
	suffix: string,
	length: number,
	page: number,
	common = true,
	): Promise<string[]> {
	const words: string[] = [];
	const pathLength = (Math.abs(+length \| 0) \|\| 6).toString();
	const url =
	`https://www.merriam-webster.com/wordfinder/classic/ends/${common ? "common" : "all"}/${pathLength}/${suffix}/${page}`;
	try {
	const res = await fetch(url, {
	headers: {
	"User-Agent":
	"Mozilla/5.0 (compatible; DomainHack/1.0; +https://example.com)",
	"Accept":
	"text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
	"Accept-Language": "en-US,en;q=0.9",
	},
	});
	if (!res.ok) {
	console.warn(`Failed to fetch ${url}: ${res.status}`);
	return words;
	}
	const html = await res.text();
	let match: RegExpExecArray \| null;
	WORD_REGEX.lastIndex = 0;
	while ((match = WORD_REGEX.exec(html)) !== null) {
	const className = match.groups?.className ?? "";
	const classList = className.split(/\s+/);
	if (!classList.includes("badge")) continue;
	const word = match.groups?.word.trim().toLowerCase();
	if (word) words.push(word);
	}
	} catch (err) {
	console.warn(`Error fetching ${url}:`, err);
	}
	return words;
	}

	/**
	* Load a filter list from a file. The file should contain one word or
	* prefix per line. Lines beginning with `#` are ignored as comments.
	*/
	async function loadFilterList(path?: string): Promise<Set<string>> {
	const set = new Set<string>();
	if (!path) return set;
	try {
	const data = await readFile(path);
	for (const line of data.split(/\r?\n/)) {
	const trimmed = line.trim().toLowerCase();
	if (trimmed && !trimmed.startsWith("#")) {
	set.add(trimmed);
	}
	}
	} catch (err) {
	console.warn(`Failed to load filter list ${path}:`, err);
	}
	return set;
	}

	/**
	* Pre‑filter the list of words before generating domain hacks. This removes
	* obvious junk such as prefixes without vowels, long consonant clusters,
	* repeated characters and words that are unlikely to form readable domain
	* hacks. The filter list or topics can be used to further constrain
	* candidates.
	*/
	function filterWords(
	words: string[],
	tld: string,
	cfg: Config,
	allowlist: Set<string>,
	): string[] {
	const suffix = tld.toLowerCase();
	const topics = cfg.topics && cfg.topics.length > 0 ? cfg.topics : DEFAULT_TOPICS;
	const out: string[] = [];
	for (const word of words) {
	if (!word.endsWith(suffix)) continue;
	if (word.length <= suffix.length) continue;
	const prefix = word.slice(0, -suffix.length);
	// Skip prefixes shorter than min length.
	if (prefix.length < cfg.minLength) continue;
	// Skip prefixes containing non‑letters.
	if (!/^[a-z]+$/.test(prefix)) continue;
	// Skip prefixes with no vowels.
	if (!/[aeiou]/.test(prefix)) continue;
	// Skip prefixes with four or more consecutive consonants.
	if (/[^aeiou]{4,}/.test(prefix)) continue;
	// Skip prefixes with triple repeated characters.
	if (/(.)\1\1/.test(prefix)) continue;
	// Skip prefixes with 'q' not followed by 'u'.
	if (/q(?!u)/.test(prefix)) continue;
	// If a filter list is provided, only include prefixes that are in the set.
	if (allowlist.size > 0 && !allowlist.has(prefix)) continue;
	// If topics are defined, ensure the prefix contains at least one.
	let matchTopic = false;
	if (topics.length > 0) {
	for (const term of topics) {
	if (prefix.includes(term)) {
	matchTopic = true;
	break;
	}
	}
	}
	if (topics.length > 0 && !matchTopic) continue;
	out.push(word);
	}
	return out;
	}

	/**
	* Load words either from a local file (newline delimited) or, if no file is
	* provided, by scraping Merriam‑Webster. When scraping the remote site this
	* function will iterate through word lengths up to the provided maximum. For
	* each length it will attempt to fetch up to three pages. Note that some
	* lengths have no pages or fewer than three; in that case the extra fetches
	* simply return empty arrays.
	*/
	async function loadCandidateWords(cfg: Config, tld: string): Promise<string[]> {
	const words: string[] = [];
	if (cfg.wordlist) {
	const data = await readFile(cfg.wordlist);
	const lines = data.split(/[\r\n]+/).filter((v) => v.trim());
	const normalized = lines.map((w) => w.trim().normalize("NFKC").toLowerCase());
	words.push(...normalized);
	} else if (cfg.offline) {
	console.error(
	`Offline mode set but no word list provided. Please supply --wordlist to use offline mode.`,
	);
	return [];
	} else {
	for (let len = cfg.minLength; len <= cfg.maxLength; len++) {
	for (let page = 1; page <= 3; page++) {
	const pageWords = await fetchWordsFromMerriam(tld, len, page, true);
	if (pageWords.length === 0) break;
	words.push(...pageWords);
	}
	}
	}

	let filtered = [...new Set(words)].filter((w) => w.length > Math.max(0, +cfg.minLength \|\| 0));
	filtered = filtered.filter((w) => w.length <= Math.min(100, +cfg.maxLength \|\| 100));
	filtered = filtered.filter((w) => w.endsWith(tld.toLowerCase().trim().replace(/^\./, "")));
	return filtered;
	}

	/**
	* Advanced scoring heuristic. Lower scores indicate more desirable names.
	*/
	export function scoreHack(prefix: string, tld: string): number {
	let score = 0;
	const length = prefix.length;
	score += length;
	if (/[^aeiou]{3,}/.test(prefix)) score += 2;
	if (/(.)\1\1/.test(prefix)) score += 1;
	const letterPenalty: Record<string, number> = {
	e: -0.5, t: -0.5, a: -0.5, o: -0.5, i: -0.5, n: -0.4,
	s: -0.4, r: -0.4, h: -0.3, l: -0.3,
	d: -0.2, c: -0.2, u: -0.2, m: -0.2, f: -0.2,
	w: -0.1, g: -0.1, y: -0.1, p: -0.1, b: -0.1,
	v: -0.1, k: -0.1,
	x: 0.5, j: 0.5, q: 0.7, z: 0.5,
	};
	for (const ch of prefix) {
	score += letterPenalty[ch] ?? 0;
	}
	const vowels = (prefix.match(/[aeiou]/g) \|\| []).length;
	score -= vowels * 0.3;
	if (/[aeiounlr]$/.test(prefix)) score -= 0.5;
	score += tld.length * 0.5;
	return score;
	}

	/**
	* Determine whether a domain appears to be available by running the `whois`
	* command. Many WHOIS servers return a standard “no match” string when a
	* domain is unregistered. Because output formats vary widely across TLDs
	* this function checks for a handful of common phrases. If the command
	* fails or times out, `undefined` is returned to indicate an inconclusive
	* result.
	*/
	export function whois(domain: string, timeout = 10_000): Promise<boolean \| undefined> {
	return new Promise((resolve) => {
	const proc = execFile("whois", [domain], { timeout }, (err, stdout) => {
	if (err) return resolve(undefined);
	const text = String(stdout).toLowerCase();
	const patterns = [
	"no match",
	"no data found",
	"no entries found",
	"domain not found",
	"is not registered",
	"registration status: available",
	"available for registration",
	"domain you requested is not",
	];
	for (const phrase of patterns) {
	if (text.includes(phrase)) return resolve(true);
	}
	resolve(false);
	});

	setTimeout(() => {
	try {
	proc.kill();
	} catch {
	/* ignore */
	} finally {
	resolve(undefined);
	}
	}, timeout);
	});
	}

	/**
	* Attempt to determine domain availability via RDAP and a simple domain
	* database lookup. The RDAP endpoint returns a JSON payload when a domain
	* is registered and typically responds with 404 for an unregistered name.
	*/
	export async function rdap(domain: string): Promise<boolean \| undefined> {
	try {
	const res = await fetch(`https://rdap.org/domain/${domain}`);
	return res.status === 404 ? true : res.ok ? false : undefined;
	} catch {
	/* ignore */
	}
	return undefined;
	}
	async function domainsDB(domain: string): Promise<boolean \| undefined> {
	try {
	const url = `https://api.domainsdb.info/v1/domains/search?domain=${encodeURIComponent(domain)}`;
	const res = await fetch(url);
	if (res.ok) {
	const data = await res.json() as any;
	const found = Array.isArray(data?.domains) && data.domains.some((d: { domain: string \| null }) => d.domain?.toLowerCase() === domain.toLowerCase());
	return found ? false : true;
	}
	} catch {
	/* ignore */
	}
	return undefined;
	}

	/**
	* Check whether a domain name appears to be available. This function first
	* attempts to use the local `whois` command. If that command isn’t
	* installed or the result is inconclusive it falls back to RDAP and
	* DomainsDB queries. A return value of `undefined` means the availability
	* could not be determined.
	*/
	export async function isDomainAvailable(domain: string): Promise<boolean \| undefined> {
	return await whois(domain) ?? await rdap(domain) ?? await domainsDB(domain) ?? undefined;
	}

	/**
	* Structure returned when checking a candidate domain.
	*/
	export interface CandidateResult {
	domain: string;
	prefix: string;
	tld: string;
	score: number;
	available: boolean \| undefined;
	}

	/**
	* Generate domain hacks for a given TLD from the supplied word list. Words
	* are pre‑filtered using morphological heuristics, topics and filter lists.
	* Candidate hacks are then scored and trimmed to the top `limit` entries.
	*/
	export function generateCandidates(
	words: string[],
	tld: string,
	cfg: Config,
	allowlist: Set<string>,
	): CandidateResult[] {
	const filtered = filterWords(words, tld, cfg, allowlist);
	const suffix = tld.toLowerCase();
	const results: CandidateResult[] = [];
	for (const word of filtered) {
	const prefix = word.slice(0, -suffix.length);
	const score = scoreHack(prefix, suffix);
	const domain = `${prefix}.${suffix}`;
	results.push({ domain, prefix, tld: suffix, score, available: undefined });
	}
	results.sort((a, b) => a.score - b.score);
	return results.slice(0, cfg.limit);
	}

	/**
	* Perform availability checks on a list of candidate domains. Results are
	* saved into Deno KV with the configured TTL. A concurrency limit helps
	* avoid overloading WHOIS servers or remote APIs.
	*/
	export async function checkCandidates(
	candidates: CandidateResult[],
	ttlMs: number,
	): Promise<CandidateResult[]> {
	const useKv = typeof Deno !== "undefined" && typeof Deno.openKv === "function";
	let kv: any = undefined;
	let cache: Record<string, { available: boolean \| undefined; checkedAt: number }> = {};
	const cacheFile = ".domainhack_cache.json";
	if (useKv) {
	kv = await Deno.openKv();
	} else {
	try {
	const text = await readFile(cacheFile);
	cache = JSON.parse(text);
	} catch {
	cache = {};
	}
	}
	const concurrency = 5;
	let index = 0;
	const checked: CandidateResult[] = [];
	async function worker() {
	while (true) {
	const i = index++;
	if (i >= candidates.length) return;
	const candidate = candidates[i];
	const key = candidate.domain;
	if (useKv) {
	const kvKey = ["availability", key];
	const cachedVal = await kv.get(kvKey);
	if (cachedVal?.value && typeof cachedVal.value.available === "boolean" && typeof cachedVal.value.checkedAt === "number") {
	const age = Date.now() - cachedVal.value.checkedAt;
	if (age < ttlMs) {
	candidate.available = cachedVal.value.available;
	checked.push(candidate);
	continue;
	}
	}
	const available = await isDomainAvailable(candidate.domain);
	candidate.available = available;
	await kv.set(kvKey, { available, checkedAt: Date.now() }, { expireIn: ttlMs });
	checked.push(candidate);
	} else {
	const entry = cache[key];
	if (entry && typeof entry.available === "boolean" && typeof entry.checkedAt === "number") {
	const age = Date.now() - entry.checkedAt;
	if (age < ttlMs) {
	candidate.available = entry.available;
	checked.push(candidate);
	continue;
	}
	}
	const available = await isDomainAvailable(candidate.domain);
	candidate.available = available;
	cache[key] = { available, checkedAt: Date.now() };
	checked.push(candidate);
	}
	}
	}
	const workers: Promise<void>[] = [];
	for (let i = 0; i < concurrency; i++) {
	workers.push(worker());
	}
	await Promise.all(workers);
	if (!useKv) {
	try {
	await writeFile(cacheFile, JSON.stringify(cache, undefined, 2));
	} catch {
	/* ignore */
	}
	}
	return checked;
	}

	/**
	* Entry point. Parses CLI flags, loads words, generates domain hacks and
	* performs availability checks. Results are printed to stdout.
	*/
	async function main() {
	const { values: args } = parseArgs({
	args: Deno.args,
	allowNegative: true,
	allowPositionals: false,
	options: {
	tlds: { type: "string", default: ["rs", "st", "ax", "me", "ly", "ci", "ai", "am", "ws"], short: "t", multiple: true },
	wordlist: { type: "string", default: undefined, short: "w" },
	filterlist: { type: "string", default: undefined, short: "f" },
	topics: { type: "string", default: undefined, short: "k", multiple: true },
	offline: { type: "boolean", default: false, short: "o" },
	minLength: { type: "string", default: "3", short: "m" },
	maxLength: { type: "string", default: "12", short: "M" },
	limit: { type: "string", default: "100", short: "l" },
	cacheDays: { type: "string", default: "30", short: "c" },
	},
	});
	const cfg: Config = {
	tlds: args.tlds.flatMap((t: string) => t.trim().split(/[,\s]+/).filter((s) => s.trim())),
	wordlist: args.wordlist,
	filterlist: args.filterlist,
	topics: args.topics ? args.topics.flatMap((k: string) => k.trim().split(/[\s,]+/).filter(Boolean)).map((x: string) => x.toLowerCase()) : undefined,
	offline: !!args.offline,
	minLength: Number(args.minLength) \|\| 3,
	maxLength: Number(args.maxLength) \|\| 12,
	limit: Number(args.limit) \|\| 100,
	cacheDays: Number(args.cacheDays) \|\| 30,
	};
	if (cfg.tlds.length === 0) {
	console.error("You must specify at least one TLD via --tlds.");
	Deno.exit(1);
	}
	// Load filter list once
	const allowlist = await loadFilterList(cfg.filterlist);
	for (const tld of cfg.tlds) {
	console.log(`Generating domain hacks for .${tld} …`);
	const words = await loadCandidateWords(cfg, tld);
	console.log(` ✔︎ Retrieved ${words.length} words ending in ${tld}`);
	const candidates = generateCandidates(words, tld, cfg, allowlist);
	console.log(` ✔︎ Generated ${candidates.length} candidate hacks`);
	const ttlMs = cfg.cacheDays * 24 * 60 * 60 * 1000;
	const results = await checkCandidates(candidates, ttlMs);
	results.sort((a, b) => {
	if (a.available === b.available) return a.score - b.score;
	if (a.available === true) return -1;
	if (b.available === true) return 1;
	return 0;
	});
	if (results.length) {
	console.log("\nResults for ." + tld);
	console.log("========================\n");
	let header = "Domain".padEnd(40);
	header += " \| ";
	header += "Status".padEnd(12);
	header += " \| ";
	header += "Score";
	console.log("\| " + header + " \|");
	console.log("\|-" + header.replace(/[^-\|]/g, "-") + "-\| ");
	for (const res of results) {
	const status = (res.available === undefined ? "unknown" : res.available ? "available" : "taken").padEnd(12);
	const domain = res.domain.padEnd(40, ".");
	const score = res.score.toFixed(2);
	console.log(`\| ${domain} \| ${status} \| ${score} \|`);
	}
	}
	console.log();
	}
	}

	if (import.meta.main) await main();
No results found