Created
June 14, 2025 08:34
-
-
Save NextdoorPsycho/ef785df317fc9a96db019e76f4d3ceb1 to your computer and use it in GitHub Desktop.
This converts all Magic: the Gathering (MTG) cards into Json, or YAML format, or both using the mtgjson website, Then formats them and strips the crap I dont really need in my app. I thought it was useful and if anyone stumbles across this in the future, hopefully it helps them..
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
mtg_card_processor.py – Download, sanitise and group MTGJSON AtomicCards | |
======================================================================== | |
**Last updated:** 2025-06-14 (newline normalisation) | |
*Downloads ➜ prunes ➜ ASCII-cleans ➜ groups ➜ outputs JSON/YAML* | |
----------------- | |
Quick usage | |
----------- | |
```bash | |
# Interactive run (prompts for grouping key) | |
python mtg_card_processor.py | |
# Group by CMC, output YAML too | |
python mtg_card_processor.py --sort cmc | |
# Build every grouping (set, cmc, colors, types) | |
python mtg_card_processor.py --all | |
``` | |
""" | |
import argparse | |
import json | |
import shutil | |
import subprocess | |
import sys | |
import time | |
from pathlib import Path | |
from typing import Dict, List | |
try: | |
import requests | |
from tqdm import tqdm | |
except ImportError: | |
print("Missing dependencies. Install with: pip install requests tqdm") | |
sys.exit(1) | |
############################################################################### | |
# CONSTANTS # | |
############################################################################### | |
DOWNLOAD_URL = "https://mtgjson.com/api/v5/AtomicCards.json" | |
INPUT_FILE = Path("AtomicCards.json") | |
MINIMIZED_FILE = Path("AtomicCardsMinimized.json") | |
REMOVE_KEYS = { | |
"foreignData", | |
"manaValue", | |
"layout", | |
"name", | |
"identifiers", | |
"purchaseUrls", | |
"printings", | |
"keywords", | |
"leadershipSkills", | |
"edhrecRank", | |
"edhrecSaltiness", | |
"subtypes", | |
"supertypes", | |
"legalities", | |
} | |
############################################################################### | |
# UNICODE → ASCII & NEWLINE NORMALISATION # | |
############################################################################### | |
UNICODE_PUNCT_MAP = str.maketrans({ | |
"\u2014": "-", # em-dash | |
"\u2013": "-", # en-dash | |
"\u2015": "-", # horizontal bar | |
"\u2212": "-", # minus | |
"\u2018": "'", # left single quote | |
"\u2019": "'", # right single quote / apostrophe | |
"\u201C": '"', # left double quote | |
"\u201D": '"', # right double quote | |
"\u2026": "...", # ellipsis | |
"\u00A0": " ", # nbsp | |
}) | |
def scrub(text: str) -> str: | |
"""Return *text* with fancy punctuation → ASCII **and** newlines collapsed.""" | |
return text.translate(UNICODE_PUNCT_MAP).replace("\r", " ").replace("\n", " ") | |
############################################################################### | |
# DOWNLOAD + CLEANING # | |
############################################################################### | |
def download_atomic_cards() -> None: | |
print("[2/10] Downloading AtomicCards.json…") | |
r = requests.get(DOWNLOAD_URL, stream=True) | |
r.raise_for_status() | |
total = int(r.headers.get("content-length", 0)) | |
block = 1 << 12 | |
start = time.time() | |
with INPUT_FILE.open("wb") as f, tqdm(total=total, unit="B", unit_scale=True, unit_divisor=1024, desc="Downloading", ncols=80) as bar: | |
for chunk in r.iter_content(block): | |
f.write(chunk) | |
bar.update(len(chunk)) | |
print(f" ↪ Downloaded {total/1_048_576:.2f} MB in {time.time()-start:.1f}s") | |
def load_and_clean() -> Dict[str, List[dict]]: | |
with INPUT_FILE.open(encoding="utf-8") as f: | |
raw = json.load(f)["data"] | |
print("[3/10] Normalising & pruning…") | |
out: Dict[str, List[dict]] = {} | |
for name, versions in raw.items(): | |
out[name] = [] | |
for card in versions: | |
c: dict = {} | |
colours = card.get("colors") | |
colour_id = card.get("colorIdentity") | |
for k, v in card.items(): | |
if k in REMOVE_KEYS or v in (None, "", [], {}): | |
continue | |
if isinstance(v, str): | |
v = scrub(v) | |
if k == "rulings": | |
rulings = [scrub(r.get("text", "")) for r in v if "text" in r] | |
if rulings: | |
c["rulings"] = rulings | |
continue | |
if k == "convertedManaCost": | |
c["CMC"] = v | |
continue | |
c[k] = v | |
if colours: | |
c["colors"] = colours | |
if colour_id and sorted(colours) != sorted(colour_id): | |
c["colorIdentity"] = colour_id | |
elif colour_id: | |
c["colorIdentity"] = colour_id | |
if "power" in card and "toughness" in card: | |
c["PT"] = f"{card['power']}/{card['toughness']}" | |
c["firstPrinting"] = card.get("firstPrinting", "UNKNOWN") | |
out[name].append(c) | |
print("[4/10] Cleaned. Writing minimised JSON…") | |
with MINIMIZED_FILE.open("w", encoding="utf-8") as f: | |
json.dump(out, f, indent=2) | |
print(f"[5/10] ➜ {MINIMIZED_FILE}") | |
return out | |
############################################################################### | |
# GROUPING + OUTPUT # | |
############################################################################### | |
def group_cards(data: Dict[str, List[dict]], key: str) -> Dict[str, Dict[str, List[dict]]]: | |
grp: Dict[str, Dict[str, List[dict]]] = {} | |
for name, versions in data.items(): | |
for v in versions: | |
if key == "set": | |
g = v.get("firstPrinting", "UNKNOWN") | |
elif key == "cmc": | |
g = str(v.get("CMC", "UNKNOWN")) | |
elif key == "colors": | |
g = "-".join(sorted(v.get("colors") or ["Colorless"])) | |
elif key == "types": | |
t = v.get("types") or v.get("type") or ["UNKNOWN"] | |
g = "-".join(sorted(t)) if isinstance(t, list) else t | |
else: | |
raise ValueError(key) | |
grp.setdefault(g, {}).setdefault(name, []).append(v) | |
print(f"[6/10] Grouped by {key}") | |
return grp | |
def write_grouped_json(grouped: Dict[str, Dict[str, List[dict]]], dest: Path) -> None: | |
if dest.exists(): | |
shutil.rmtree(dest) | |
dest.mkdir() | |
for g, cards in grouped.items(): | |
p = dest / f"{g}.json" | |
serial = {n: vs[0] if len(vs) == 1 else vs for n, vs in cards.items()} | |
with p.open("w", encoding="utf-8") as f: | |
json.dump(serial, f, indent=2) | |
print(f" ↪ {p.relative_to(dest.parent)}") | |
def json_to_yaml(folder: Path) -> None: | |
if shutil.which("yq") is None: | |
print("❌ `yq` not found – YAML skipped.") | |
return | |
for jf in folder.glob("*.json"): | |
yf = jf.with_suffix(".yaml") | |
with jf.open() as f: | |
data = json.load(f) | |
def strip(v): | |
if isinstance(v, dict): | |
v.pop("firstPrinting", None) | |
if "isFunny" in v: | |
v["Funny"] = v.pop("isFunny") | |
elif isinstance(v, list): | |
for i in v: | |
strip(i) | |
for entry in data.values(): | |
strip(entry) | |
tmp = jf.with_suffix(".tmp.json") | |
with tmp.open("w") as f: | |
json.dump(data, f, indent=2) | |
subprocess.run(f"yq -Poy {tmp} > {yf}", shell=True, check=True) | |
jf.unlink(); tmp.unlink() | |
print(f" ↪ {jf.name} → {yf.name}") | |
############################################################################### | |
# CLI # | |
############################################################################### | |
def choose_grouping() -> str: | |
opts = [ | |
("set", "first-printing set (default)"), | |
("cmc", "converted mana cost"), | |
("colors", "colour combination"), | |
("types", "card types"), | |
] | |
print("Choose grouping criterion:") | |
for i, (_, desc) in enumerate(opts, 1): | |
print(f" {i}. {desc}") | |
while True: | |
sel = input("Enter choice [1]: ") or "1" | |
if sel.isdigit() and 1 <= int(sel) <= len(opts): | |
return opts[int(sel)-1][0] | |
print("Invalid choice – try again.") | |
def parse_args(): | |
p = argparse.ArgumentParser("MTG AtomicCards cleaner & grouper") | |
mx = p.add_mutually_exclusive_group() | |
mx.add_argument("-s", "--sort", choices=["set", "cmc", "colors", "types"], help="Grouping key") | |
mx.add_argument("--all", action="store_true", help="Produce all four groupings in one run") | |
p.add_argument("--no-yaml", action="store_true", help="Skip YAML conversion") | |
return p.parse_args() | |
def main(): | |
a = parse_args() | |
print("[1/10] Checking AtomicCards.json…") | |
if INPUT_FILE.exists() and input("⚠️ File exists. Re-download? [y/N]: ").lower() == "y": | |
INPUT_FILE.unlink() | |
if not INPUT_FILE.exists(): | |
download_atomic_cards() | |
data = load_and_clean() | |
keys = ["set", "cmc", "colors", "types"] if a.all else [a.sort or choose_grouping()] | |
for k in keys: | |
grouped = group_cards(data, k) | |
folder = Path(f"CardsGroupedBy_{k.upper()}") | |
write_grouped_json(grouped, folder) | |
if not a.no_yaml: | |
json_to_yaml(folder) | |
print("[10/10] ✅ Complete.") | |
if __name__ == "__main__": | |
try: | |
main() | |
except KeyboardInterrupt: | |
print("\nInterrupted – exiting.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment