NextdoorPsycho · June 14, 2025 08:34
diff --git a/process_cards.py b/process_cards.py
 #!/usr/bin/env python3
 """
 mtg_card_processor.py – Download, sanitise and group MTGJSON AtomicCards
 ========================================================================
 **Last updated:** 2025-06-14 (newline normalisation)
 *Downloads ➜ prunes ➜ ASCII-cleans ➜ groups ➜ outputs JSON/YAML*
 -----------------
 Quick usage
 -----------
 ```bash
 # Interactive run (prompts for grouping key)
 python mtg_card_processor.py

 # Group by CMC, output YAML too
 python mtg_card_processor.py --sort cmc

 # Build every grouping (set, cmc, colors, types)
 python mtg_card_processor.py --all
 ```
 """

 import argparse
 import json
 import shutil
 import subprocess
 import sys
 import time
 from pathlib import Path
 from typing import Dict, List

 try:
    import requests
    from tqdm import tqdm
 except ImportError:
    print("Missing dependencies. Install with: pip install requests tqdm")
    sys.exit(1)

 ###############################################################################
 # CONSTANTS                                                                   #
 ###############################################################################
 DOWNLOAD_URL = "https://mtgjson.com/api/v5/AtomicCards.json"
 INPUT_FILE = Path("AtomicCards.json")
 MINIMIZED_FILE = Path("AtomicCardsMinimized.json")
 REMOVE_KEYS = {
    "foreignData",
    "manaValue",
    "layout",
    "name",
    "identifiers",
    "purchaseUrls",
    "printings",
    "keywords",
    "leadershipSkills",
    "edhrecRank",
    "edhrecSaltiness",
    "subtypes",
    "supertypes",
    "legalities",
 }

 ###############################################################################
 # UNICODE → ASCII & NEWLINE NORMALISATION                                     #
 ###############################################################################
 UNICODE_PUNCT_MAP = str.maketrans({
    "\u2014": "-",  # em-dash
    "\u2013": "-",  # en-dash
    "\u2015": "-",  # horizontal bar
    "\u2212": "-",  # minus
    "\u2018": "'",  # left single quote
    "\u2019": "'",  # right single quote / apostrophe
    "\u201C": '"',  # left double quote
    "\u201D": '"',  # right double quote
    "\u2026": "...",  # ellipsis
    "\u00A0": " ",  # nbsp
 })


 def scrub(text: str) -> str:
    """Return *text* with fancy punctuation → ASCII **and** newlines collapsed."""
    return text.translate(UNICODE_PUNCT_MAP).replace("\r", " ").replace("\n", " ")

 ###############################################################################
 # DOWNLOAD + CLEANING                                                         #
 ###############################################################################

 def download_atomic_cards() -> None:
    print("[2/10] Downloading AtomicCards.json…")
    r = requests.get(DOWNLOAD_URL, stream=True)
    r.raise_for_status()
    total = int(r.headers.get("content-length", 0))
    block = 1 << 12
    start = time.time()
    with INPUT_FILE.open("wb") as f, tqdm(total=total, unit="B", unit_scale=True, unit_divisor=1024, desc="Downloading", ncols=80) as bar:
        for chunk in r.iter_content(block):
            f.write(chunk)
            bar.update(len(chunk))
    print(f"  ↪ Downloaded {total/1_048_576:.2f} MB in {time.time()-start:.1f}s")


 def load_and_clean() -> Dict[str, List[dict]]:
    with INPUT_FILE.open(encoding="utf-8") as f:
        raw = json.load(f)["data"]

    print("[3/10] Normalising & pruning…")
    out: Dict[str, List[dict]] = {}

    for name, versions in raw.items():
        out[name] = []
        for card in versions:
            c: dict = {}

            colours = card.get("colors")
            colour_id = card.get("colorIdentity")

            for k, v in card.items():
                if k in REMOVE_KEYS or v in (None, "", [], {}):
                    continue

                if isinstance(v, str):
                    v = scrub(v)

                if k == "rulings":
                    rulings = [scrub(r.get("text", "")) for r in v if "text" in r]
                    if rulings:
                        c["rulings"] = rulings
                    continue
                if k == "convertedManaCost":
                    c["CMC"] = v
                    continue

                c[k] = v

            if colours:
                c["colors"] = colours
                if colour_id and sorted(colours) != sorted(colour_id):
                    c["colorIdentity"] = colour_id
            elif colour_id:
                c["colorIdentity"] = colour_id

            if "power" in card and "toughness" in card:
                c["PT"] = f"{card['power']}/{card['toughness']}"

            c["firstPrinting"] = card.get("firstPrinting", "UNKNOWN")
            out[name].append(c)

    print("[4/10] Cleaned. Writing minimised JSON…")
    with MINIMIZED_FILE.open("w", encoding="utf-8") as f:
        json.dump(out, f, indent=2)
    print(f"[5/10] ➜ {MINIMIZED_FILE}")
    return out

 ###############################################################################
 # GROUPING + OUTPUT                                                           #
 ###############################################################################

 def group_cards(data: Dict[str, List[dict]], key: str) -> Dict[str, Dict[str, List[dict]]]:
    grp: Dict[str, Dict[str, List[dict]]] = {}
    for name, versions in data.items():
        for v in versions:
            if key == "set":
                g = v.get("firstPrinting", "UNKNOWN")
            elif key == "cmc":
                g = str(v.get("CMC", "UNKNOWN"))
            elif key == "colors":
                g = "-".join(sorted(v.get("colors") or ["Colorless"]))
            elif key == "types":
                t = v.get("types") or v.get("type") or ["UNKNOWN"]
                g = "-".join(sorted(t)) if isinstance(t, list) else t
            else:
                raise ValueError(key)
            grp.setdefault(g, {}).setdefault(name, []).append(v)
    print(f"[6/10] Grouped by {key}")
    return grp


 def write_grouped_json(grouped: Dict[str, Dict[str, List[dict]]], dest: Path) -> None:
    if dest.exists():
        shutil.rmtree(dest)
    dest.mkdir()
    for g, cards in grouped.items():
        p = dest / f"{g}.json"
        serial = {n: vs[0] if len(vs) == 1 else vs for n, vs in cards.items()}
        with p.open("w", encoding="utf-8") as f:
            json.dump(serial, f, indent=2)
        print(f"  ↪ {p.relative_to(dest.parent)}")


 def json_to_yaml(folder: Path) -> None:
    if shutil.which("yq") is None:
        print("❌ `yq` not found – YAML skipped.")
        return
    for jf in folder.glob("*.json"):
        yf = jf.with_suffix(".yaml")
        with jf.open() as f:
            data = json.load(f)
        def strip(v):
            if isinstance(v, dict):
                v.pop("firstPrinting", None)
                if "isFunny" in v:
                    v["Funny"] = v.pop("isFunny")
            elif isinstance(v, list):
                for i in v:
                    strip(i)
        for entry in data.values():
            strip(entry)
        tmp = jf.with_suffix(".tmp.json")
        with tmp.open("w") as f:
            json.dump(data, f, indent=2)
        subprocess.run(f"yq -Poy {tmp} > {yf}", shell=True, check=True)
        jf.unlink(); tmp.unlink()
        print(f"  ↪ {jf.name} → {yf.name}")

 ###############################################################################
 # CLI                                                                         #
 ###############################################################################

 def choose_grouping() -> str:
    opts = [
        ("set", "first-printing set (default)"),
        ("cmc", "converted mana cost"),
        ("colors", "colour combination"),
        ("types", "card types"),
    ]
    print("Choose grouping criterion:")
    for i, (_, desc) in enumerate(opts, 1):
        print(f"  {i}. {desc}")
    while True:
        sel = input("Enter choice [1]: ") or "1"
        if sel.isdigit() and 1 <= int(sel) <= len(opts):
            return opts[int(sel)-1][0]
        print("Invalid choice – try again.")


 def parse_args():
    p = argparse.ArgumentParser("MTG AtomicCards cleaner & grouper")
    mx = p.add_mutually_exclusive_group()
    mx.add_argument("-s", "--sort", choices=["set", "cmc", "colors", "types"], help="Grouping key")
    mx.add_argument("--all", action="store_true", help="Produce all four groupings in one run")
    p.add_argument("--no-yaml", action="store_true", help="Skip YAML conversion")
    return p.parse_args()


 def main():
    a = parse_args()
    print("[1/10] Checking AtomicCards.json…")
    if INPUT_FILE.exists() and input("⚠️  File exists. Re-download? [y/N]: ").lower() == "y":
        INPUT_FILE.unlink()
    if not INPUT_FILE.exists():
        download_atomic_cards()

    data = load_and_clean()
    keys = ["set", "cmc", "colors", "types"] if a.all else [a.sort or choose_grouping()]
    for k in keys:
        grouped = group_cards(data, k)
        folder = Path(f"CardsGroupedBy_{k.upper()}")
        write_grouped_json(grouped, folder)
        if not a.no_yaml:
            json_to_yaml(folder)
    print("[10/10] ✅ Complete.")


 if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nInterrupted – exiting.")
	#!/usr/bin/env python3
	"""
	mtg_card_processor.py – Download, sanitise and group MTGJSON AtomicCards
	========================================================================
	Last updated: 2025-06-14 (newline normalisation)
	Downloads ➜ prunes ➜ ASCII-cleans ➜ groups ➜ outputs JSON/YAML
	-----------------
	Quick usage
	-----------
	```bash
	# Interactive run (prompts for grouping key)
	python mtg_card_processor.py

	# Group by CMC, output YAML too
	python mtg_card_processor.py --sort cmc

	# Build every grouping (set, cmc, colors, types)
	python mtg_card_processor.py --all
	```
	"""

	import argparse
	import json
	import shutil
	import subprocess
	import sys
	import time
	from pathlib import Path
	from typing import Dict, List

	try:
	import requests
	from tqdm import tqdm
	except ImportError:
	print("Missing dependencies. Install with: pip install requests tqdm")
	sys.exit(1)

	###############################################################################
	# CONSTANTS #
	###############################################################################
	DOWNLOAD_URL = "https://mtgjson.com/api/v5/AtomicCards.json"
	INPUT_FILE = Path("AtomicCards.json")
	MINIMIZED_FILE = Path("AtomicCardsMinimized.json")
	REMOVE_KEYS = {
	"foreignData",
	"manaValue",
	"layout",
	"name",
	"identifiers",
	"purchaseUrls",
	"printings",
	"keywords",
	"leadershipSkills",
	"edhrecRank",
	"edhrecSaltiness",
	"subtypes",
	"supertypes",
	"legalities",
	}

	###############################################################################
	# UNICODE → ASCII & NEWLINE NORMALISATION #
	###############################################################################
	UNICODE_PUNCT_MAP = str.maketrans({
	"\u2014": "-", # em-dash
	"\u2013": "-", # en-dash
	"\u2015": "-", # horizontal bar
	"\u2212": "-", # minus
	"\u2018": "'", # left single quote
	"\u2019": "'", # right single quote / apostrophe
	"\u201C": '"', # left double quote
	"\u201D": '"', # right double quote
	"\u2026": "...", # ellipsis
	"\u00A0": " ", # nbsp
	})


	def scrub(text: str) -> str:
	"""Return text with fancy punctuation → ASCII and newlines collapsed."""
	return text.translate(UNICODE_PUNCT_MAP).replace("\r", " ").replace("\n", " ")

	###############################################################################
	# DOWNLOAD + CLEANING #
	###############################################################################

	def download_atomic_cards() -> None:
	print("[2/10] Downloading AtomicCards.json…")
	r = requests.get(DOWNLOAD_URL, stream=True)
	r.raise_for_status()
	total = int(r.headers.get("content-length", 0))
	block = 1 << 12
	start = time.time()
	with INPUT_FILE.open("wb") as f, tqdm(total=total, unit="B", unit_scale=True, unit_divisor=1024, desc="Downloading", ncols=80) as bar:
	for chunk in r.iter_content(block):
	f.write(chunk)
	bar.update(len(chunk))
	print(f" ↪ Downloaded {total/1_048_576:.2f} MB in {time.time()-start:.1f}s")


	def load_and_clean() -> Dict[str, List[dict]]:
	with INPUT_FILE.open(encoding="utf-8") as f:
	raw = json.load(f)["data"]

	print("[3/10] Normalising & pruning…")
	out: Dict[str, List[dict]] = {}

	for name, versions in raw.items():
	out[name] = []
	for card in versions:
	c: dict = {}

	colours = card.get("colors")
	colour_id = card.get("colorIdentity")

	for k, v in card.items():
	if k in REMOVE_KEYS or v in (None, "", [], {}):
	continue

	if isinstance(v, str):
	v = scrub(v)

	if k == "rulings":
	rulings = [scrub(r.get("text", "")) for r in v if "text" in r]
	if rulings:
	c["rulings"] = rulings
	continue
	if k == "convertedManaCost":
	c["CMC"] = v
	continue

	c[k] = v

	if colours:
	c["colors"] = colours
	if colour_id and sorted(colours) != sorted(colour_id):
	c["colorIdentity"] = colour_id
	elif colour_id:
	c["colorIdentity"] = colour_id

	if "power" in card and "toughness" in card:
	c["PT"] = f"{card['power']}/{card['toughness']}"

	c["firstPrinting"] = card.get("firstPrinting", "UNKNOWN")
	out[name].append(c)

	print("[4/10] Cleaned. Writing minimised JSON…")
	with MINIMIZED_FILE.open("w", encoding="utf-8") as f:
	json.dump(out, f, indent=2)
	print(f"[5/10] ➜ {MINIMIZED_FILE}")
	return out

	###############################################################################
	# GROUPING + OUTPUT #
	###############################################################################

	def group_cards(data: Dict[str, List[dict]], key: str) -> Dict[str, Dict[str, List[dict]]]:
	grp: Dict[str, Dict[str, List[dict]]] = {}
	for name, versions in data.items():
	for v in versions:
	if key == "set":
	g = v.get("firstPrinting", "UNKNOWN")
	elif key == "cmc":
	g = str(v.get("CMC", "UNKNOWN"))
	elif key == "colors":
	g = "-".join(sorted(v.get("colors") or ["Colorless"]))
	elif key == "types":
	t = v.get("types") or v.get("type") or ["UNKNOWN"]
	g = "-".join(sorted(t)) if isinstance(t, list) else t
	else:
	raise ValueError(key)
	grp.setdefault(g, {}).setdefault(name, []).append(v)
	print(f"[6/10] Grouped by {key}")
	return grp


	def write_grouped_json(grouped: Dict[str, Dict[str, List[dict]]], dest: Path) -> None:
	if dest.exists():
	shutil.rmtree(dest)
	dest.mkdir()
	for g, cards in grouped.items():
	p = dest / f"{g}.json"
	serial = {n: vs[0] if len(vs) == 1 else vs for n, vs in cards.items()}
	with p.open("w", encoding="utf-8") as f:
	json.dump(serial, f, indent=2)
	print(f" ↪ {p.relative_to(dest.parent)}")


	def json_to_yaml(folder: Path) -> None:
	if shutil.which("yq") is None:
	print("❌ `yq` not found – YAML skipped.")
	return
	for jf in folder.glob("*.json"):
	yf = jf.with_suffix(".yaml")
	with jf.open() as f:
	data = json.load(f)
	def strip(v):
	if isinstance(v, dict):
	v.pop("firstPrinting", None)
	if "isFunny" in v:
	v["Funny"] = v.pop("isFunny")
	elif isinstance(v, list):
	for i in v:
	strip(i)
	for entry in data.values():
	strip(entry)
	tmp = jf.with_suffix(".tmp.json")
	with tmp.open("w") as f:
	json.dump(data, f, indent=2)
	subprocess.run(f"yq -Poy {tmp} > {yf}", shell=True, check=True)
	jf.unlink(); tmp.unlink()
	print(f" ↪ {jf.name} → {yf.name}")

	###############################################################################
	# CLI #
	###############################################################################

	def choose_grouping() -> str:
	opts = [
	("set", "first-printing set (default)"),
	("cmc", "converted mana cost"),
	("colors", "colour combination"),
	("types", "card types"),
	]
	print("Choose grouping criterion:")
	for i, (_, desc) in enumerate(opts, 1):
	print(f" {i}. {desc}")
	while True:
	sel = input("Enter choice [1]: ") or "1"
	if sel.isdigit() and 1 <= int(sel) <= len(opts):
	return opts[int(sel)-1][0]
	print("Invalid choice – try again.")


	def parse_args():
	p = argparse.ArgumentParser("MTG AtomicCards cleaner & grouper")
	mx = p.add_mutually_exclusive_group()
	mx.add_argument("-s", "--sort", choices=["set", "cmc", "colors", "types"], help="Grouping key")
	mx.add_argument("--all", action="store_true", help="Produce all four groupings in one run")
	p.add_argument("--no-yaml", action="store_true", help="Skip YAML conversion")
	return p.parse_args()


	def main():
	a = parse_args()
	print("[1/10] Checking AtomicCards.json…")
	if INPUT_FILE.exists() and input("⚠️ File exists. Re-download? [y/N]: ").lower() == "y":
	INPUT_FILE.unlink()
	if not INPUT_FILE.exists():
	download_atomic_cards()

	data = load_and_clean()
	keys = ["set", "cmc", "colors", "types"] if a.all else [a.sort or choose_grouping()]
	for k in keys:
	grouped = group_cards(data, k)
	folder = Path(f"CardsGroupedBy_{k.upper()}")
	write_grouped_json(grouped, folder)
	if not a.no_yaml:
	json_to_yaml(folder)
	print("[10/10] ✅ Complete.")


	if __name__ == "__main__":
	try:
	main()
	except KeyboardInterrupt:
	print("\nInterrupted – exiting.")