Skip to content

Instantly share code, notes, and snippets.

@NextdoorPsycho
Created June 14, 2025 08:34
Show Gist options
  • Save NextdoorPsycho/ef785df317fc9a96db019e76f4d3ceb1 to your computer and use it in GitHub Desktop.
Save NextdoorPsycho/ef785df317fc9a96db019e76f4d3ceb1 to your computer and use it in GitHub Desktop.
This converts all Magic: the Gathering (MTG) cards into Json, or YAML format, or both using the mtgjson website, Then formats them and strips the crap I dont really need in my app. I thought it was useful and if anyone stumbles across this in the future, hopefully it helps them..
#!/usr/bin/env python3
"""
mtg_card_processor.py – Download, sanitise and group MTGJSON AtomicCards
========================================================================
**Last updated:** 2025-06-14 (newline normalisation)
*Downloads ➜ prunes ➜ ASCII-cleans ➜ groups ➜ outputs JSON/YAML*
-----------------
Quick usage
-----------
```bash
# Interactive run (prompts for grouping key)
python mtg_card_processor.py
# Group by CMC, output YAML too
python mtg_card_processor.py --sort cmc
# Build every grouping (set, cmc, colors, types)
python mtg_card_processor.py --all
```
"""
import argparse
import json
import shutil
import subprocess
import sys
import time
from pathlib import Path
from typing import Dict, List
try:
import requests
from tqdm import tqdm
except ImportError:
print("Missing dependencies. Install with: pip install requests tqdm")
sys.exit(1)
###############################################################################
# CONSTANTS #
###############################################################################
DOWNLOAD_URL = "https://mtgjson.com/api/v5/AtomicCards.json"
INPUT_FILE = Path("AtomicCards.json")
MINIMIZED_FILE = Path("AtomicCardsMinimized.json")
REMOVE_KEYS = {
"foreignData",
"manaValue",
"layout",
"name",
"identifiers",
"purchaseUrls",
"printings",
"keywords",
"leadershipSkills",
"edhrecRank",
"edhrecSaltiness",
"subtypes",
"supertypes",
"legalities",
}
###############################################################################
# UNICODE → ASCII & NEWLINE NORMALISATION #
###############################################################################
UNICODE_PUNCT_MAP = str.maketrans({
"\u2014": "-", # em-dash
"\u2013": "-", # en-dash
"\u2015": "-", # horizontal bar
"\u2212": "-", # minus
"\u2018": "'", # left single quote
"\u2019": "'", # right single quote / apostrophe
"\u201C": '"', # left double quote
"\u201D": '"', # right double quote
"\u2026": "...", # ellipsis
"\u00A0": " ", # nbsp
})
def scrub(text: str) -> str:
"""Return *text* with fancy punctuation → ASCII **and** newlines collapsed."""
return text.translate(UNICODE_PUNCT_MAP).replace("\r", " ").replace("\n", " ")
###############################################################################
# DOWNLOAD + CLEANING #
###############################################################################
def download_atomic_cards() -> None:
print("[2/10] Downloading AtomicCards.json…")
r = requests.get(DOWNLOAD_URL, stream=True)
r.raise_for_status()
total = int(r.headers.get("content-length", 0))
block = 1 << 12
start = time.time()
with INPUT_FILE.open("wb") as f, tqdm(total=total, unit="B", unit_scale=True, unit_divisor=1024, desc="Downloading", ncols=80) as bar:
for chunk in r.iter_content(block):
f.write(chunk)
bar.update(len(chunk))
print(f" ↪ Downloaded {total/1_048_576:.2f} MB in {time.time()-start:.1f}s")
def load_and_clean() -> Dict[str, List[dict]]:
with INPUT_FILE.open(encoding="utf-8") as f:
raw = json.load(f)["data"]
print("[3/10] Normalising & pruning…")
out: Dict[str, List[dict]] = {}
for name, versions in raw.items():
out[name] = []
for card in versions:
c: dict = {}
colours = card.get("colors")
colour_id = card.get("colorIdentity")
for k, v in card.items():
if k in REMOVE_KEYS or v in (None, "", [], {}):
continue
if isinstance(v, str):
v = scrub(v)
if k == "rulings":
rulings = [scrub(r.get("text", "")) for r in v if "text" in r]
if rulings:
c["rulings"] = rulings
continue
if k == "convertedManaCost":
c["CMC"] = v
continue
c[k] = v
if colours:
c["colors"] = colours
if colour_id and sorted(colours) != sorted(colour_id):
c["colorIdentity"] = colour_id
elif colour_id:
c["colorIdentity"] = colour_id
if "power" in card and "toughness" in card:
c["PT"] = f"{card['power']}/{card['toughness']}"
c["firstPrinting"] = card.get("firstPrinting", "UNKNOWN")
out[name].append(c)
print("[4/10] Cleaned. Writing minimised JSON…")
with MINIMIZED_FILE.open("w", encoding="utf-8") as f:
json.dump(out, f, indent=2)
print(f"[5/10] ➜ {MINIMIZED_FILE}")
return out
###############################################################################
# GROUPING + OUTPUT #
###############################################################################
def group_cards(data: Dict[str, List[dict]], key: str) -> Dict[str, Dict[str, List[dict]]]:
grp: Dict[str, Dict[str, List[dict]]] = {}
for name, versions in data.items():
for v in versions:
if key == "set":
g = v.get("firstPrinting", "UNKNOWN")
elif key == "cmc":
g = str(v.get("CMC", "UNKNOWN"))
elif key == "colors":
g = "-".join(sorted(v.get("colors") or ["Colorless"]))
elif key == "types":
t = v.get("types") or v.get("type") or ["UNKNOWN"]
g = "-".join(sorted(t)) if isinstance(t, list) else t
else:
raise ValueError(key)
grp.setdefault(g, {}).setdefault(name, []).append(v)
print(f"[6/10] Grouped by {key}")
return grp
def write_grouped_json(grouped: Dict[str, Dict[str, List[dict]]], dest: Path) -> None:
if dest.exists():
shutil.rmtree(dest)
dest.mkdir()
for g, cards in grouped.items():
p = dest / f"{g}.json"
serial = {n: vs[0] if len(vs) == 1 else vs for n, vs in cards.items()}
with p.open("w", encoding="utf-8") as f:
json.dump(serial, f, indent=2)
print(f" ↪ {p.relative_to(dest.parent)}")
def json_to_yaml(folder: Path) -> None:
if shutil.which("yq") is None:
print("❌ `yq` not found – YAML skipped.")
return
for jf in folder.glob("*.json"):
yf = jf.with_suffix(".yaml")
with jf.open() as f:
data = json.load(f)
def strip(v):
if isinstance(v, dict):
v.pop("firstPrinting", None)
if "isFunny" in v:
v["Funny"] = v.pop("isFunny")
elif isinstance(v, list):
for i in v:
strip(i)
for entry in data.values():
strip(entry)
tmp = jf.with_suffix(".tmp.json")
with tmp.open("w") as f:
json.dump(data, f, indent=2)
subprocess.run(f"yq -Poy {tmp} > {yf}", shell=True, check=True)
jf.unlink(); tmp.unlink()
print(f" ↪ {jf.name} → {yf.name}")
###############################################################################
# CLI #
###############################################################################
def choose_grouping() -> str:
opts = [
("set", "first-printing set (default)"),
("cmc", "converted mana cost"),
("colors", "colour combination"),
("types", "card types"),
]
print("Choose grouping criterion:")
for i, (_, desc) in enumerate(opts, 1):
print(f" {i}. {desc}")
while True:
sel = input("Enter choice [1]: ") or "1"
if sel.isdigit() and 1 <= int(sel) <= len(opts):
return opts[int(sel)-1][0]
print("Invalid choice – try again.")
def parse_args():
p = argparse.ArgumentParser("MTG AtomicCards cleaner & grouper")
mx = p.add_mutually_exclusive_group()
mx.add_argument("-s", "--sort", choices=["set", "cmc", "colors", "types"], help="Grouping key")
mx.add_argument("--all", action="store_true", help="Produce all four groupings in one run")
p.add_argument("--no-yaml", action="store_true", help="Skip YAML conversion")
return p.parse_args()
def main():
a = parse_args()
print("[1/10] Checking AtomicCards.json…")
if INPUT_FILE.exists() and input("⚠️ File exists. Re-download? [y/N]: ").lower() == "y":
INPUT_FILE.unlink()
if not INPUT_FILE.exists():
download_atomic_cards()
data = load_and_clean()
keys = ["set", "cmc", "colors", "types"] if a.all else [a.sort or choose_grouping()]
for k in keys:
grouped = group_cards(data, k)
folder = Path(f"CardsGroupedBy_{k.upper()}")
write_grouped_json(grouped, folder)
if not a.no_yaml:
json_to_yaml(folder)
print("[10/10] ✅ Complete.")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\nInterrupted – exiting.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment