Skip to content

Instantly share code, notes, and snippets.

@jpivarski
Created June 19, 2025 21:49
Show Gist options
  • Save jpivarski/277ae45223302f33e0eae59d77987e01 to your computer and use it in GitHub Desktop.
Save jpivarski/277ae45223302f33e0eae59d77987e01 to your computer and use it in GitHub Desktop.
Make Chinese Anki
import json
import os
import glob
import genanki
class Word:
group_index = {}
def __init__(self, group, hanzi, definition):
self.group = group
self.hanzi = hanzi
self.definition = definition
self.index = self.group_index[group] = self.group_index.get(group, -1) + 1
def hint_filename(self):
return f"hints/{self.group}-{self.index:02d}-{self.hanzi}.json"
def image_filename(self):
return f"images/{self.group}-{self.index:02d}-{self.hanzi}.png"
with open("all-words.tsv") as file:
all_words = [Word(*line.rstrip().split("\t")) for line in file]
uniqueid1 = 1778988700
uniqueid2 = 1450502968
hanzi_writer = """
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/hanzi-writer.min.js"></script>
<div style="float: right;">
<div id="hanzi0"></div>
<div id="hanzi1"></div>
<div id="hanzi2"></div>
</div>
<script>
var word = "{{Answer}}";
var writers = [];
if (word.length > 0) {
writers.push(HanziWriter.create("hanzi0", word[0], {
width: 100,
height: 100,
padding: 5,
showOutline: true,
strokeColor: '#0000ff',
strokeAnimationSpeed: 2,
delayBetweenStrokes: 100
}));
document.getElementById("hanzi0").addEventListener("click", function() {
writers[0].animateCharacter();
});
}
if (word.length > 1) {
writers.push(HanziWriter.create("hanzi1", word[1], {
width: 100,
height: 100,
padding: 5,
showOutline: true,
strokeColor: '#0000ff',
strokeAnimationSpeed: 2,
delayBetweenStrokes: 100
}));
document.getElementById("hanzi1").addEventListener("click", function() {
writers[1].animateCharacter();
});
}
if (word.length > 2) {
writers.push(HanziWriter.create("hanzi2", word[2], {
width: 100,
height: 100,
padding: 5,
showOutline: true,
strokeColor: '#0000ff',
strokeAnimationSpeed: 2,
delayBetweenStrokes: 100
}));
document.getElementById("hanzi2").addEventListener("click", function() {
writers[2].animateCharacter();
});
}
</script>
"""
model = genanki.Model(
model_id=uniqueid1,
name="Basic (type in the answer) (Jim)",
fields=[{"name": "Chinese"}, {"name": "ChineseBlank"}, {"name": "English"}, {"name": "Definition"}, {"name": "Picture"}, {"name": "Answer"}],
templates=[
{
"name": "Card 1",
"qfmt": """<div style="font-size: 40px;">{{ChineseBlank}}</div>
{{Picture}}
<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
&ldquo;{{English}}&rdquo;<br><br>{{Definition}}
</div>
{{type:Answer}}
""",
"afmt": """<div style="font-size: 40px;">{{Chinese}}</div>""" + hanzi_writer + """
{{Picture}}
<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
&ldquo;{{English}}&rdquo;<br><br>{{Definition}}
</div>
{{type:Answer}}
<hr id=answer>
""",
}
],
css="""
.card {
font-family: arial;
font-size: 20px;
text-align: center;
color: black;
background-color: white;
}
.card.nightMode {
font-family: arial;
font-size: 20px;
text-align: center;
color: black;
background-color: white;
}
""",
model_type=0,
)
model_learn = genanki.Model(
model_id=uniqueid1 + 1,
name="Basic (type in the answer) (Jim; Learn)",
fields=model.fields,
templates=[
{
"name": model.templates[0]["name"],
"qfmt": """<div style="font-size: 40px;">{{ChineseBlank}}</div>""" + hanzi_writer + """
{{Picture}}
<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
&ldquo;{{English}}&rdquo;<br><br>{{Definition}}
</div>
{{type:Answer}}
""",
"afmt": model.templates[0]["afmt"]
}
],
css=model.css,
)
focus_format = (
hanzi_writer
.replace("width: 100", "width: 200")
.replace("height: 100", "height: 200")
.replace("float: right;", "margin-left: auto; margin-right: auto;")
+ """
<br clear="all">
<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;">
{{Definition}}
</div>
{{type:Answer}}
""")
model_focus = genanki.Model(
model_id=uniqueid1 + 2,
name="Basic (type in the answer) (Jim; Focus)",
fields=model.fields,
templates=[
{
"name": model.templates[0]["name"],
"qfmt": focus_format,
"afmt": focus_format + """
<hr id=answer>
""",
}
],
css=model.css,
)
decks = {}
image_filenames = []
for word in all_words:
# FIXME: this is partial
if word.group == "HSK1_027":
continue
if os.path.exists(word.hint_filename()) and os.path.exists(word.image_filename()):
if word.group not in decks:
decks[word.group] = genanki.Deck(uniqueid2 + len(decks), "quiz::" + word.group.replace("_", "::"))
decks[word.group + "_learn"] = genanki.Deck(uniqueid2 + len(decks), "learn::" + word.group.replace("_", "::"))
decks[word.group + "_focus"] = genanki.Deck(uniqueid2 + len(decks), "focus::" + word.group.replace("_", "::"))
deck = decks[word.group]
deck_learn = decks[word.group + "_learn"]
deck_focus = decks[word.group + "_focus"]
with open(word.hint_filename()) as file:
response = json.load(file)
content = json.loads(response["choices"][0]["message"]["content"])
image = f"<img src=\"{word.image_filename().split('/', 1)[1]}\" style=\"width: 200px; float: left; margin-right: 10px; margin-bottom: 10px;\">"
chinese = content["sentence"].replace(word.hanzi, f"<span style=\"color: #0000ff;\">{word.hanzi}</span>")
chinese_blank = content["sentence"].replace(word.hanzi, "<span style=\"text-decoration: underline; color: #0000ff;\">&nbsp;&nbsp;&nbsp;</span>")
english = content["sentence_translation"]
definition = content["better_definition"]
image_filenames.append(word.image_filename())
deck.add_note(
genanki.Note(model, [chinese, chinese_blank, english, definition, image, word.hanzi])
)
deck_learn.add_note(
genanki.Note(model_learn, [chinese, chinese_blank, english, definition, image, word.hanzi])
)
deck_focus.add_note(
genanki.Note(model_focus, [chinese, chinese_blank, english, definition, image, word.hanzi])
)
package = genanki.Package([deck for name, deck in sorted(decks.items())])
package.media_files = image_filenames
package.write_to_file("hsk1.apkg")
import os
import base64
import json
import requests
GENERATE_CHINESE_FLASHCARDS = os.environ["GENERATE_CHINESE_FLASHCARDS"]
class Word:
group_index = {}
def __init__(self, group, hanzi, definition):
self.group = group
self.hanzi = hanzi
self.definition = definition
self.index = self.group_index[group] = self.group_index.get(group, -1) + 1
def hint_filename(self):
return f"hints/{self.group}-{self.index:02d}-{self.hanzi}.json"
def image_filename(self):
return f"images/{self.group}-{self.index:02d}-{self.hanzi}.png"
with open("all-words.tsv") as file:
all_words = [Word(*line.rstrip().split("\t")) for line in file]
hsk1 = set(word.hanzi for word in all_words if word.group.startswith("HSK1_"))
for word in all_words:
if not word.group.startswith("HSK1_"):
continue
print(word.group, word.index, word.hanzi, word.definition)
if os.path.exists(word.hint_filename()):
with open(word.hint_filename()) as file:
response = json.load(file)
else:
try:
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {GENERATE_CHINESE_FLASHCARDS}",
},
json={
"model": "gpt-4.1",
"messages": [
{
"role": "developer",
"content": 'You are an author of flashcards for learning Chinese words. Given a Chinese word and an English definition (separated by ":"), you respond with the following in JSON format: (1) a numeric score from 0 to 100 indicating the quality of the given definition, with higher scores for clarity and learning potential, not for comprehensiveness, (2) a clear, easy-to-learn English definition, which may or may not be the same as the given definition, (3) a very short Chinese sentence consisting exclusively of words in HSK1*, using the word that can be visualized as an image for the front of the flashcard, (4) a detailed prompt for `gpt-image-1` to generate that image in a soft apocalypse animation style, and (5) an English translation of that sentence.\n\n*Words in HSK1: '
+ ", ".join(hsk1),
},
{"role": "user", "content": f"{word.hanzi}: {word.definition}"},
],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "response",
"schema": {
"type": "object",
"properties": {
"given_definition_quality": {"type": "integer"},
"better_definition": {"type": "string"},
"sentence": {"type": "string"},
"image_prompt": {"type": "string"},
"sentence_translation": {"type": "string"},
},
"required": [
"given_definition_quality",
"better_definition",
"sentence",
"image_prompt",
"sentence_translation",
],
"additionalProperties": False,
},
},
},
},
).json()
print(" prompts: ", len(response.get("choices", [])))
if len(response.get("choices", [])):
content = json.loads(response["choices"][0]["message"]["content"])
if word.hanzi not in content["sentence"]:
raise RuntimeError(f"{word.hanzi} not in {content['sentence']} ({content['sentence_translation']})")
with open(word.hint_filename(), "w") as file:
json.dump(response, file)
except Exception as err:
print(f"{type(err).__name__}: {err}")
continue
if len(response.get("choices", [])) != 0:
content = json.loads(response["choices"][0]["message"]["content"])
print(" sentence: ", content["sentence_translation"])
if len(response.get("choices", [])) != 0 and not os.path.exists(
word.image_filename()
):
try:
image_response = requests.post(
"https://api.openai.com/v1/images/generations",
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {GENERATE_CHINESE_FLASHCARDS}",
},
json={
"model": "gpt-image-1",
"prompt": content["image_prompt"]
+ "\n\nUse a soft apocalypse animation style, somewhere between science fiction and fantasy, with an emphasis on serenity and overgrown foliage. Don't put any text in the image.",
"output_format": "png",
"quality": "low",
"size": "1024x1024",
},
).json()
print(" images: ", len(image_response.get("data", [])))
if len(image_response.get("data", [])) != 0:
with open(word.image_filename(), "wb") as file:
file.write(base64.b64decode(image_response["data"][0]["b64_json"]))
except Exception as err:
print(f"{type(err).__name__}: {err}")
continue
if "usage" in response and "usage" in image_response:
print(
" cost: $"
+ str(
2 * response["usage"]["prompt_tokens"] / 1e6
+ 8 * response["usage"]["completion_tokens"] / 1e6
+ 10 * image_response["usage"]["input_tokens"] / 1e6
+ 40 * image_response["usage"]["output_tokens"] / 1e6
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment