jpivarski · June 19, 2025 21:49
diff --git a/generate-deck.py b/generate-deck.py
 import json
 import os
 import glob

 import genanki

 class Word:
    group_index = {}

    def __init__(self, group, hanzi, definition):
        self.group = group
        self.hanzi = hanzi
        self.definition = definition
        self.index = self.group_index[group] = self.group_index.get(group, -1) + 1

    def hint_filename(self):
        return f"hints/{self.group}-{self.index:02d}-{self.hanzi}.json"

    def image_filename(self):
        return f"images/{self.group}-{self.index:02d}-{self.hanzi}.png"


 with open("all-words.tsv") as file:
    all_words = [Word(*line.rstrip().split("\t")) for line in file]


 uniqueid1 = 1778988700
 uniqueid2 = 1450502968

 hanzi_writer = """
 <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/hanzi-writer.min.js"></script>

 <div style="float: right;">
  <div id="hanzi0"></div>
  <div id="hanzi1"></div>
  <div id="hanzi2"></div>
 </div>

 <script>
 var word = "{{Answer}}";
 var writers = [];
 if (word.length > 0) {
  writers.push(HanziWriter.create("hanzi0", word[0], {
    width: 100,
    height: 100,
    padding: 5,
    showOutline: true,
    strokeColor: '#0000ff',
    strokeAnimationSpeed: 2,
    delayBetweenStrokes: 100
  }));
  document.getElementById("hanzi0").addEventListener("click", function() {
    writers[0].animateCharacter();
  });
 }
 if (word.length > 1) {
  writers.push(HanziWriter.create("hanzi1", word[1], {
    width: 100,
    height: 100,
    padding: 5,
    showOutline: true,
    strokeColor: '#0000ff',
    strokeAnimationSpeed: 2,
    delayBetweenStrokes: 100
  }));
  document.getElementById("hanzi1").addEventListener("click", function() {
    writers[1].animateCharacter();
  });
 }
 if (word.length > 2) {
  writers.push(HanziWriter.create("hanzi2", word[2], {
    width: 100,
    height: 100,
    padding: 5,
    showOutline: true,
    strokeColor: '#0000ff',
    strokeAnimationSpeed: 2,
    delayBetweenStrokes: 100
  }));
  document.getElementById("hanzi2").addEventListener("click", function() {
    writers[2].animateCharacter();
  });
 }
 </script>
 """

 model = genanki.Model(
    model_id=uniqueid1,
    name="Basic (type in the answer) (Jim)",
    fields=[{"name": "Chinese"}, {"name": "ChineseBlank"}, {"name": "English"}, {"name": "Definition"}, {"name": "Picture"}, {"name": "Answer"}],
    templates=[
        {
            "name": "Card 1",
            "qfmt": """<div style="font-size: 40px;">{{ChineseBlank}}</div>

 {{Picture}}

 <div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
 &ldquo;{{English}}&rdquo;<br><br>{{Definition}}
 </div>

 {{type:Answer}}
 """,
            "afmt": """<div style="font-size: 40px;">{{Chinese}}</div>""" + hanzi_writer + """
 {{Picture}}

 <div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
 &ldquo;{{English}}&rdquo;<br><br>{{Definition}}
 </div>

 {{type:Answer}}

 <hr id=answer>
 """,
        }
    ],
    css="""
 .card {
  font-family: arial;
  font-size: 20px;
  text-align: center;
  color: black;
  background-color: white;
 }

 .card.nightMode {
  font-family: arial;
  font-size: 20px;
  text-align: center;
  color: black;
  background-color: white;
 }
 """,
    model_type=0,
 )

 model_learn = genanki.Model(
    model_id=uniqueid1 + 1,
    name="Basic (type in the answer) (Jim; Learn)",
    fields=model.fields,
    templates=[
        {
            "name": model.templates[0]["name"],
            "qfmt": """<div style="font-size: 40px;">{{ChineseBlank}}</div>""" + hanzi_writer + """

 {{Picture}}

 <div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
 &ldquo;{{English}}&rdquo;<br><br>{{Definition}}
 </div>

 {{type:Answer}}
 """,
            "afmt": model.templates[0]["afmt"]
        }
    ],
    css=model.css,
 )

 focus_format = (
    hanzi_writer
    .replace("width: 100", "width: 200")
    .replace("height: 100", "height: 200")
    .replace("float: right;", "margin-left: auto; margin-right: auto;")
    + """

 <br clear="all">
 <div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
 {{Definition}}
 </div>

 {{type:Answer}}
 """)

 model_focus = genanki.Model(
    model_id=uniqueid1 + 2,
    name="Basic (type in the answer) (Jim; Focus)",
    fields=model.fields,
    templates=[
        {
            "name": model.templates[0]["name"],
            "qfmt": focus_format,
            "afmt": focus_format + """
 <hr id=answer>
 """,
        }
    ],
    css=model.css,
 )

 decks = {}

 image_filenames = []
 for word in all_words:
    # FIXME: this is partial
    if word.group == "HSK1_027":
        continue

    if os.path.exists(word.hint_filename()) and os.path.exists(word.image_filename()):
        if word.group not in decks:
            decks[word.group] = genanki.Deck(uniqueid2 + len(decks), "quiz::" + word.group.replace("_", "::"))
            decks[word.group + "_learn"] = genanki.Deck(uniqueid2 + len(decks), "learn::" + word.group.replace("_", "::"))
            decks[word.group + "_focus"] = genanki.Deck(uniqueid2 + len(decks), "focus::" + word.group.replace("_", "::"))
        deck = decks[word.group]
        deck_learn = decks[word.group + "_learn"]
        deck_focus = decks[word.group + "_focus"]

        with open(word.hint_filename()) as file:
            response = json.load(file)
        content = json.loads(response["choices"][0]["message"]["content"])
        image = f"<img src=\"{word.image_filename().split('/', 1)[1]}\" style=\"width: 200px; float: left; margin-right: 10px; margin-bottom: 10px;\">"
        chinese = content["sentence"].replace(word.hanzi, f"<span style=\"color: #0000ff;\">{word.hanzi}</span>")
        chinese_blank = content["sentence"].replace(word.hanzi, "<span style=\"text-decoration: underline; color: #0000ff;\">&nbsp;&nbsp;&nbsp;</span>")
        english = content["sentence_translation"]
        definition = content["better_definition"]
        image_filenames.append(word.image_filename())

        deck.add_note(
            genanki.Note(model, [chinese, chinese_blank, english, definition, image, word.hanzi])
        )
        deck_learn.add_note(
            genanki.Note(model_learn, [chinese, chinese_blank, english, definition, image, word.hanzi])
        )
        deck_focus.add_note(
            genanki.Note(model_focus, [chinese, chinese_blank, english, definition, image, word.hanzi])
        )

 package = genanki.Package([deck for name, deck in sorted(decks.items())])
 package.media_files = image_filenames
 package.write_to_file("hsk1.apkg")
diff --git a/generate-images.py b/generate-images.py
 import os
 import base64
 import json

 import requests

 GENERATE_CHINESE_FLASHCARDS = os.environ["GENERATE_CHINESE_FLASHCARDS"]


 class Word:
    group_index = {}

    def __init__(self, group, hanzi, definition):
        self.group = group
        self.hanzi = hanzi
        self.definition = definition
        self.index = self.group_index[group] = self.group_index.get(group, -1) + 1

    def hint_filename(self):
        return f"hints/{self.group}-{self.index:02d}-{self.hanzi}.json"

    def image_filename(self):
        return f"images/{self.group}-{self.index:02d}-{self.hanzi}.png"


 with open("all-words.tsv") as file:
    all_words = [Word(*line.rstrip().split("\t")) for line in file]

 hsk1 = set(word.hanzi for word in all_words if word.group.startswith("HSK1_"))

 for word in all_words:
    if not word.group.startswith("HSK1_"):
        continue

    print(word.group, word.index, word.hanzi, word.definition)
    if os.path.exists(word.hint_filename()):
        with open(word.hint_filename()) as file:
            response = json.load(file)

    else:
        try:
            response = requests.post(
                "https://api.openai.com/v1/chat/completions",
                headers={
                    "Content-Type": "application/json",
                    "Authorization": f"Bearer {GENERATE_CHINESE_FLASHCARDS}",
                },
                json={
                    "model": "gpt-4.1",
                    "messages": [
                        {
                            "role": "developer",
                            "content": 'You are an author of flashcards for learning Chinese words. Given a Chinese word and an English definition (separated by ":"), you respond with the following in JSON format: (1) a numeric score from 0 to 100 indicating the quality of the given definition, with higher scores for clarity and learning potential, not for comprehensiveness, (2) a clear, easy-to-learn English definition, which may or may not be the same as the given definition, (3) a very short Chinese sentence consisting exclusively of words in HSK1*, using the word that can be visualized as an image for the front of the flashcard, (4) a detailed prompt for `gpt-image-1` to generate that image in a soft apocalypse animation style, and (5) an English translation of that sentence.\n\n*Words in HSK1: '
                            + ", ".join(hsk1),
                        },
                        {"role": "user", "content": f"{word.hanzi}: {word.definition}"},
                    ],
                    "response_format": {
                        "type": "json_schema",
                        "json_schema": {
                            "name": "response",
                            "schema": {
                                "type": "object",
                                "properties": {
                                    "given_definition_quality": {"type": "integer"},
                                    "better_definition": {"type": "string"},
                                    "sentence": {"type": "string"},
                                    "image_prompt": {"type": "string"},
                                    "sentence_translation": {"type": "string"},
                                },
                                "required": [
                                    "given_definition_quality",
                                    "better_definition",
                                    "sentence",
                                    "image_prompt",
                                    "sentence_translation",
                                ],
                                "additionalProperties": False,
                            },
                        },
                    },
                },
            ).json()

            print("    prompts: ", len(response.get("choices", [])))
            if len(response.get("choices", [])):
                content = json.loads(response["choices"][0]["message"]["content"])
                if word.hanzi not in content["sentence"]:
                    raise RuntimeError(f"{word.hanzi} not in {content['sentence']} ({content['sentence_translation']})")

            with open(word.hint_filename(), "w") as file:
                json.dump(response, file)

        except Exception as err:
            print(f"{type(err).__name__}: {err}")
            continue

    if len(response.get("choices", [])) != 0:
        content = json.loads(response["choices"][0]["message"]["content"])
        print("    sentence: ", content["sentence_translation"])

    if len(response.get("choices", [])) != 0 and not os.path.exists(
        word.image_filename()
    ):
        try:
            image_response = requests.post(
                "https://api.openai.com/v1/images/generations",
                headers={
                    "Content-Type": "application/json",
                    "Authorization": f"Bearer {GENERATE_CHINESE_FLASHCARDS}",
                },
                json={
                    "model": "gpt-image-1",
                    "prompt": content["image_prompt"]
                    + "\n\nUse a soft apocalypse animation style, somewhere between science fiction and fantasy, with an emphasis on serenity and overgrown foliage. Don't put any text in the image.",
                    "output_format": "png",
                    "quality": "low",
                    "size": "1024x1024",
                },
            ).json()

            print("    images: ", len(image_response.get("data", [])))
            if len(image_response.get("data", [])) != 0:
                with open(word.image_filename(), "wb") as file:
                    file.write(base64.b64decode(image_response["data"][0]["b64_json"]))

        except Exception as err:
            print(f"{type(err).__name__}: {err}")
            continue

        if "usage" in response and "usage" in image_response:
            print(
                "    cost: $"
                + str(
                    2 * response["usage"]["prompt_tokens"] / 1e6
                    + 8 * response["usage"]["completion_tokens"] / 1e6
                    + 10 * image_response["usage"]["input_tokens"] / 1e6
                    + 40 * image_response["usage"]["output_tokens"] / 1e6
                )
            )
	import json
	import os
	import glob

	import genanki

	class Word:
	group_index = {}

	def __init__(self, group, hanzi, definition):
	self.group = group
	self.hanzi = hanzi
	self.definition = definition
	self.index = self.group_index[group] = self.group_index.get(group, -1) + 1

	def hint_filename(self):
	return f"hints/{self.group}-{self.index:02d}-{self.hanzi}.json"

	def image_filename(self):
	return f"images/{self.group}-{self.index:02d}-{self.hanzi}.png"


	with open("all-words.tsv") as file:
	all_words = [Word(*line.rstrip().split("\t")) for line in file]


	uniqueid1 = 1778988700
	uniqueid2 = 1450502968

	hanzi_writer = """
	<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/hanzi-writer.min.js"></script>

	<div style="float: right;">
	<div id="hanzi0"></div>
	<div id="hanzi1"></div>
	<div id="hanzi2"></div>
	</div>

	<script>
	var word = "{{Answer}}";
	var writers = [];
	if (word.length > 0) {
	writers.push(HanziWriter.create("hanzi0", word[0], {
	width: 100,
	height: 100,
	padding: 5,
	showOutline: true,
	strokeColor: '#0000ff',
	strokeAnimationSpeed: 2,
	delayBetweenStrokes: 100
	}));
	document.getElementById("hanzi0").addEventListener("click", function() {
	writers[0].animateCharacter();
	});
	}
	if (word.length > 1) {
	writers.push(HanziWriter.create("hanzi1", word[1], {
	width: 100,
	height: 100,
	padding: 5,
	showOutline: true,
	strokeColor: '#0000ff',
	strokeAnimationSpeed: 2,
	delayBetweenStrokes: 100
	}));
	document.getElementById("hanzi1").addEventListener("click", function() {
	writers[1].animateCharacter();
	});
	}
	if (word.length > 2) {
	writers.push(HanziWriter.create("hanzi2", word[2], {
	width: 100,
	height: 100,
	padding: 5,
	showOutline: true,
	strokeColor: '#0000ff',
	strokeAnimationSpeed: 2,
	delayBetweenStrokes: 100
	}));
	document.getElementById("hanzi2").addEventListener("click", function() {
	writers[2].animateCharacter();
	});
	}
	</script>
	"""

	model = genanki.Model(
	model_id=uniqueid1,
	name="Basic (type in the answer) (Jim)",
	fields=[{"name": "Chinese"}, {"name": "ChineseBlank"}, {"name": "English"}, {"name": "Definition"}, {"name": "Picture"}, {"name": "Answer"}],
	templates=[
	{
	"name": "Card 1",
	"qfmt": """<div style="font-size: 40px;">{{ChineseBlank}}</div>

	{{Picture}}

	<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
	“{{English}}”<br><br>{{Definition}}
	</div>

	{{type:Answer}}
	""",
	"afmt": """<div style="font-size: 40px;">{{Chinese}}</div>""" + hanzi_writer + """
	{{Picture}}

	<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
	“{{English}}”<br><br>{{Definition}}
	</div>

	{{type:Answer}}

	<hr id=answer>
	""",
	}
	],
	css="""
	.card {
	font-family: arial;
	font-size: 20px;
	text-align: center;
	color: black;
	background-color: white;
	}

	.card.nightMode {
	font-family: arial;
	font-size: 20px;
	text-align: center;
	color: black;
	background-color: white;
	}
	""",
	model_type=0,
	)

	model_learn = genanki.Model(
	model_id=uniqueid1 + 1,
	name="Basic (type in the answer) (Jim; Learn)",
	fields=model.fields,
	templates=[
	{
	"name": model.templates[0]["name"],
	"qfmt": """<div style="font-size: 40px;">{{ChineseBlank}}</div>""" + hanzi_writer + """

	{{Picture}}

	<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
	“{{English}}”<br><br>{{Definition}}
	</div>

	{{type:Answer}}
	""",
	"afmt": model.templates[0]["afmt"]
	}
	],
	css=model.css,
	)

	focus_format = (
	hanzi_writer
	.replace("width: 100", "width: 200")
	.replace("height: 100", "height: 200")
	.replace("float: right;", "margin-left: auto; margin-right: auto;")
	+ """

	<br clear="all">
	<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
	{{Definition}}
	</div>

	{{type:Answer}}
	""")

	model_focus = genanki.Model(
	model_id=uniqueid1 + 2,
	name="Basic (type in the answer) (Jim; Focus)",
	fields=model.fields,
	templates=[
	{
	"name": model.templates[0]["name"],
	"qfmt": focus_format,
	"afmt": focus_format + """
	<hr id=answer>
	""",
	}
	],
	css=model.css,
	)

	decks = {}

	image_filenames = []
	for word in all_words:
	# FIXME: this is partial
	if word.group == "HSK1_027":
	continue

	if os.path.exists(word.hint_filename()) and os.path.exists(word.image_filename()):
	if word.group not in decks:
	decks[word.group] = genanki.Deck(uniqueid2 + len(decks), "quiz::" + word.group.replace("_", "::"))
	decks[word.group + "_learn"] = genanki.Deck(uniqueid2 + len(decks), "learn::" + word.group.replace("_", "::"))
	decks[word.group + "_focus"] = genanki.Deck(uniqueid2 + len(decks), "focus::" + word.group.replace("_", "::"))
	deck = decks[word.group]
	deck_learn = decks[word.group + "_learn"]
	deck_focus = decks[word.group + "_focus"]

	with open(word.hint_filename()) as file:
	response = json.load(file)
	content = json.loads(response["choices"][0]["message"]["content"])
	image = f"<img src=\"{word.image_filename().split('/', 1)[1]}\" style=\"width: 200px; float: left; margin-right: 10px; margin-bottom: 10px;\">"
	chinese = content["sentence"].replace(word.hanzi, f"<span style=\"color: #0000ff;\">{word.hanzi}</span>")
	chinese_blank = content["sentence"].replace(word.hanzi, "<span style=\"text-decoration: underline; color: #0000ff;\">   </span>")
	english = content["sentence_translation"]
	definition = content["better_definition"]
	image_filenames.append(word.image_filename())

	deck.add_note(
	genanki.Note(model, [chinese, chinese_blank, english, definition, image, word.hanzi])
	)
	deck_learn.add_note(
	genanki.Note(model_learn, [chinese, chinese_blank, english, definition, image, word.hanzi])
	)
	deck_focus.add_note(
	genanki.Note(model_focus, [chinese, chinese_blank, english, definition, image, word.hanzi])
	)

	package = genanki.Package([deck for name, deck in sorted(decks.items())])
	package.media_files = image_filenames
	package.write_to_file("hsk1.apkg")
	import os
	import base64
	import json

	import requests

	GENERATE_CHINESE_FLASHCARDS = os.environ["GENERATE_CHINESE_FLASHCARDS"]


	class Word:
	group_index = {}

	def __init__(self, group, hanzi, definition):
	self.group = group
	self.hanzi = hanzi
	self.definition = definition
	self.index = self.group_index[group] = self.group_index.get(group, -1) + 1

	def hint_filename(self):
	return f"hints/{self.group}-{self.index:02d}-{self.hanzi}.json"

	def image_filename(self):
	return f"images/{self.group}-{self.index:02d}-{self.hanzi}.png"


	with open("all-words.tsv") as file:
	all_words = [Word(*line.rstrip().split("\t")) for line in file]

	hsk1 = set(word.hanzi for word in all_words if word.group.startswith("HSK1_"))

	for word in all_words:
	if not word.group.startswith("HSK1_"):
	continue

	print(word.group, word.index, word.hanzi, word.definition)
	if os.path.exists(word.hint_filename()):
	with open(word.hint_filename()) as file:
	response = json.load(file)

	else:
	try:
	response = requests.post(
	"https://api.openai.com/v1/chat/completions",
	headers={
	"Content-Type": "application/json",
	"Authorization": f"Bearer {GENERATE_CHINESE_FLASHCARDS}",
	},
	json={
	"model": "gpt-4.1",
	"messages": [
	{
	"role": "developer",
	"content": 'You are an author of flashcards for learning Chinese words. Given a Chinese word and an English definition (separated by ":"), you respond with the following in JSON format: (1) a numeric score from 0 to 100 indicating the quality of the given definition, with higher scores for clarity and learning potential, not for comprehensiveness, (2) a clear, easy-to-learn English definition, which may or may not be the same as the given definition, (3) a very short Chinese sentence consisting exclusively of words in HSK1, using the word that can be visualized as an image for the front of the flashcard, (4) a detailed prompt for `gpt-image-1` to generate that image in a soft apocalypse animation style, and (5) an English translation of that sentence.\n\nWords in HSK1: '
	+ ", ".join(hsk1),
	},
	{"role": "user", "content": f"{word.hanzi}: {word.definition}"},
	],
	"response_format": {
	"type": "json_schema",
	"json_schema": {
	"name": "response",
	"schema": {
	"type": "object",
	"properties": {
	"given_definition_quality": {"type": "integer"},
	"better_definition": {"type": "string"},
	"sentence": {"type": "string"},
	"image_prompt": {"type": "string"},
	"sentence_translation": {"type": "string"},
	},
	"required": [
	"given_definition_quality",
	"better_definition",
	"sentence",
	"image_prompt",
	"sentence_translation",
	],
	"additionalProperties": False,
	},
	},
	},
	},
	).json()

	print(" prompts: ", len(response.get("choices", [])))
	if len(response.get("choices", [])):
	content = json.loads(response["choices"][0]["message"]["content"])
	if word.hanzi not in content["sentence"]:
	raise RuntimeError(f"{word.hanzi} not in {content['sentence']} ({content['sentence_translation']})")

	with open(word.hint_filename(), "w") as file:
	json.dump(response, file)

	except Exception as err:
	print(f"{type(err).__name__}: {err}")
	continue

	if len(response.get("choices", [])) != 0:
	content = json.loads(response["choices"][0]["message"]["content"])
	print(" sentence: ", content["sentence_translation"])

	if len(response.get("choices", [])) != 0 and not os.path.exists(
	word.image_filename()
	):
	try:
	image_response = requests.post(
	"https://api.openai.com/v1/images/generations",
	headers={
	"Content-Type": "application/json",
	"Authorization": f"Bearer {GENERATE_CHINESE_FLASHCARDS}",
	},
	json={
	"model": "gpt-image-1",
	"prompt": content["image_prompt"]
	+ "\n\nUse a soft apocalypse animation style, somewhere between science fiction and fantasy, with an emphasis on serenity and overgrown foliage. Don't put any text in the image.",
	"output_format": "png",
	"quality": "low",
	"size": "1024x1024",
	},
	).json()

	print(" images: ", len(image_response.get("data", [])))
	if len(image_response.get("data", [])) != 0:
	with open(word.image_filename(), "wb") as file:
	file.write(base64.b64decode(image_response["data"][0]["b64_json"]))

	except Exception as err:
	print(f"{type(err).__name__}: {err}")
	continue

	if "usage" in response and "usage" in image_response:
	print(
	" cost: $"
	+ str(
	2 * response["usage"]["prompt_tokens"] / 1e6
	+ 8 * response["usage"]["completion_tokens"] / 1e6
	+ 10 * image_response["usage"]["input_tokens"] / 1e6
	+ 40 * image_response["usage"]["output_tokens"] / 1e6
	)
	)