Created
June 19, 2025 21:49
-
-
Save jpivarski/277ae45223302f33e0eae59d77987e01 to your computer and use it in GitHub Desktop.
Make Chinese Anki
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import glob | |
import genanki | |
class Word: | |
group_index = {} | |
def __init__(self, group, hanzi, definition): | |
self.group = group | |
self.hanzi = hanzi | |
self.definition = definition | |
self.index = self.group_index[group] = self.group_index.get(group, -1) + 1 | |
def hint_filename(self): | |
return f"hints/{self.group}-{self.index:02d}-{self.hanzi}.json" | |
def image_filename(self): | |
return f"images/{self.group}-{self.index:02d}-{self.hanzi}.png" | |
with open("all-words.tsv") as file: | |
all_words = [Word(*line.rstrip().split("\t")) for line in file] | |
uniqueid1 = 1778988700 | |
uniqueid2 = 1450502968 | |
hanzi_writer = """ | |
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/hanzi-writer.min.js"></script> | |
<div style="float: right;"> | |
<div id="hanzi0"></div> | |
<div id="hanzi1"></div> | |
<div id="hanzi2"></div> | |
</div> | |
<script> | |
var word = "{{Answer}}"; | |
var writers = []; | |
if (word.length > 0) { | |
writers.push(HanziWriter.create("hanzi0", word[0], { | |
width: 100, | |
height: 100, | |
padding: 5, | |
showOutline: true, | |
strokeColor: '#0000ff', | |
strokeAnimationSpeed: 2, | |
delayBetweenStrokes: 100 | |
})); | |
document.getElementById("hanzi0").addEventListener("click", function() { | |
writers[0].animateCharacter(); | |
}); | |
} | |
if (word.length > 1) { | |
writers.push(HanziWriter.create("hanzi1", word[1], { | |
width: 100, | |
height: 100, | |
padding: 5, | |
showOutline: true, | |
strokeColor: '#0000ff', | |
strokeAnimationSpeed: 2, | |
delayBetweenStrokes: 100 | |
})); | |
document.getElementById("hanzi1").addEventListener("click", function() { | |
writers[1].animateCharacter(); | |
}); | |
} | |
if (word.length > 2) { | |
writers.push(HanziWriter.create("hanzi2", word[2], { | |
width: 100, | |
height: 100, | |
padding: 5, | |
showOutline: true, | |
strokeColor: '#0000ff', | |
strokeAnimationSpeed: 2, | |
delayBetweenStrokes: 100 | |
})); | |
document.getElementById("hanzi2").addEventListener("click", function() { | |
writers[2].animateCharacter(); | |
}); | |
} | |
</script> | |
""" | |
model = genanki.Model( | |
model_id=uniqueid1, | |
name="Basic (type in the answer) (Jim)", | |
fields=[{"name": "Chinese"}, {"name": "ChineseBlank"}, {"name": "English"}, {"name": "Definition"}, {"name": "Picture"}, {"name": "Answer"}], | |
templates=[ | |
{ | |
"name": "Card 1", | |
"qfmt": """<div style="font-size: 40px;">{{ChineseBlank}}</div> | |
{{Picture}} | |
<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;"> | |
“{{English}}”<br><br>{{Definition}} | |
</div> | |
{{type:Answer}} | |
""", | |
"afmt": """<div style="font-size: 40px;">{{Chinese}}</div>""" + hanzi_writer + """ | |
{{Picture}} | |
<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;"> | |
“{{English}}”<br><br>{{Definition}} | |
</div> | |
{{type:Answer}} | |
<hr id=answer> | |
""", | |
} | |
], | |
css=""" | |
.card { | |
font-family: arial; | |
font-size: 20px; | |
text-align: center; | |
color: black; | |
background-color: white; | |
} | |
.card.nightMode { | |
font-family: arial; | |
font-size: 20px; | |
text-align: center; | |
color: black; | |
background-color: white; | |
} | |
""", | |
model_type=0, | |
) | |
model_learn = genanki.Model( | |
model_id=uniqueid1 + 1, | |
name="Basic (type in the answer) (Jim; Learn)", | |
fields=model.fields, | |
templates=[ | |
{ | |
"name": model.templates[0]["name"], | |
"qfmt": """<div style="font-size: 40px;">{{ChineseBlank}}</div>""" + hanzi_writer + """ | |
{{Picture}} | |
<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;"> | |
“{{English}}”<br><br>{{Definition}} | |
</div> | |
{{type:Answer}} | |
""", | |
"afmt": model.templates[0]["afmt"] | |
} | |
], | |
css=model.css, | |
) | |
focus_format = ( | |
hanzi_writer | |
.replace("width: 100", "width: 200") | |
.replace("height: 100", "height: 200") | |
.replace("float: right;", "margin-left: auto; margin-right: auto;") | |
+ """ | |
<br clear="all"> | |
<div style="margin-top: 10px; margin-left: 10px; margin-right: 10px;"> | |
{{Definition}} | |
</div> | |
{{type:Answer}} | |
""") | |
model_focus = genanki.Model( | |
model_id=uniqueid1 + 2, | |
name="Basic (type in the answer) (Jim; Focus)", | |
fields=model.fields, | |
templates=[ | |
{ | |
"name": model.templates[0]["name"], | |
"qfmt": focus_format, | |
"afmt": focus_format + """ | |
<hr id=answer> | |
""", | |
} | |
], | |
css=model.css, | |
) | |
decks = {} | |
image_filenames = [] | |
for word in all_words: | |
# FIXME: this is partial | |
if word.group == "HSK1_027": | |
continue | |
if os.path.exists(word.hint_filename()) and os.path.exists(word.image_filename()): | |
if word.group not in decks: | |
decks[word.group] = genanki.Deck(uniqueid2 + len(decks), "quiz::" + word.group.replace("_", "::")) | |
decks[word.group + "_learn"] = genanki.Deck(uniqueid2 + len(decks), "learn::" + word.group.replace("_", "::")) | |
decks[word.group + "_focus"] = genanki.Deck(uniqueid2 + len(decks), "focus::" + word.group.replace("_", "::")) | |
deck = decks[word.group] | |
deck_learn = decks[word.group + "_learn"] | |
deck_focus = decks[word.group + "_focus"] | |
with open(word.hint_filename()) as file: | |
response = json.load(file) | |
content = json.loads(response["choices"][0]["message"]["content"]) | |
image = f"<img src=\"{word.image_filename().split('/', 1)[1]}\" style=\"width: 200px; float: left; margin-right: 10px; margin-bottom: 10px;\">" | |
chinese = content["sentence"].replace(word.hanzi, f"<span style=\"color: #0000ff;\">{word.hanzi}</span>") | |
chinese_blank = content["sentence"].replace(word.hanzi, "<span style=\"text-decoration: underline; color: #0000ff;\"> </span>") | |
english = content["sentence_translation"] | |
definition = content["better_definition"] | |
image_filenames.append(word.image_filename()) | |
deck.add_note( | |
genanki.Note(model, [chinese, chinese_blank, english, definition, image, word.hanzi]) | |
) | |
deck_learn.add_note( | |
genanki.Note(model_learn, [chinese, chinese_blank, english, definition, image, word.hanzi]) | |
) | |
deck_focus.add_note( | |
genanki.Note(model_focus, [chinese, chinese_blank, english, definition, image, word.hanzi]) | |
) | |
package = genanki.Package([deck for name, deck in sorted(decks.items())]) | |
package.media_files = image_filenames | |
package.write_to_file("hsk1.apkg") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import base64 | |
import json | |
import requests | |
GENERATE_CHINESE_FLASHCARDS = os.environ["GENERATE_CHINESE_FLASHCARDS"] | |
class Word: | |
group_index = {} | |
def __init__(self, group, hanzi, definition): | |
self.group = group | |
self.hanzi = hanzi | |
self.definition = definition | |
self.index = self.group_index[group] = self.group_index.get(group, -1) + 1 | |
def hint_filename(self): | |
return f"hints/{self.group}-{self.index:02d}-{self.hanzi}.json" | |
def image_filename(self): | |
return f"images/{self.group}-{self.index:02d}-{self.hanzi}.png" | |
with open("all-words.tsv") as file: | |
all_words = [Word(*line.rstrip().split("\t")) for line in file] | |
hsk1 = set(word.hanzi for word in all_words if word.group.startswith("HSK1_")) | |
for word in all_words: | |
if not word.group.startswith("HSK1_"): | |
continue | |
print(word.group, word.index, word.hanzi, word.definition) | |
if os.path.exists(word.hint_filename()): | |
with open(word.hint_filename()) as file: | |
response = json.load(file) | |
else: | |
try: | |
response = requests.post( | |
"https://api.openai.com/v1/chat/completions", | |
headers={ | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {GENERATE_CHINESE_FLASHCARDS}", | |
}, | |
json={ | |
"model": "gpt-4.1", | |
"messages": [ | |
{ | |
"role": "developer", | |
"content": 'You are an author of flashcards for learning Chinese words. Given a Chinese word and an English definition (separated by ":"), you respond with the following in JSON format: (1) a numeric score from 0 to 100 indicating the quality of the given definition, with higher scores for clarity and learning potential, not for comprehensiveness, (2) a clear, easy-to-learn English definition, which may or may not be the same as the given definition, (3) a very short Chinese sentence consisting exclusively of words in HSK1*, using the word that can be visualized as an image for the front of the flashcard, (4) a detailed prompt for `gpt-image-1` to generate that image in a soft apocalypse animation style, and (5) an English translation of that sentence.\n\n*Words in HSK1: ' | |
+ ", ".join(hsk1), | |
}, | |
{"role": "user", "content": f"{word.hanzi}: {word.definition}"}, | |
], | |
"response_format": { | |
"type": "json_schema", | |
"json_schema": { | |
"name": "response", | |
"schema": { | |
"type": "object", | |
"properties": { | |
"given_definition_quality": {"type": "integer"}, | |
"better_definition": {"type": "string"}, | |
"sentence": {"type": "string"}, | |
"image_prompt": {"type": "string"}, | |
"sentence_translation": {"type": "string"}, | |
}, | |
"required": [ | |
"given_definition_quality", | |
"better_definition", | |
"sentence", | |
"image_prompt", | |
"sentence_translation", | |
], | |
"additionalProperties": False, | |
}, | |
}, | |
}, | |
}, | |
).json() | |
print(" prompts: ", len(response.get("choices", []))) | |
if len(response.get("choices", [])): | |
content = json.loads(response["choices"][0]["message"]["content"]) | |
if word.hanzi not in content["sentence"]: | |
raise RuntimeError(f"{word.hanzi} not in {content['sentence']} ({content['sentence_translation']})") | |
with open(word.hint_filename(), "w") as file: | |
json.dump(response, file) | |
except Exception as err: | |
print(f"{type(err).__name__}: {err}") | |
continue | |
if len(response.get("choices", [])) != 0: | |
content = json.loads(response["choices"][0]["message"]["content"]) | |
print(" sentence: ", content["sentence_translation"]) | |
if len(response.get("choices", [])) != 0 and not os.path.exists( | |
word.image_filename() | |
): | |
try: | |
image_response = requests.post( | |
"https://api.openai.com/v1/images/generations", | |
headers={ | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {GENERATE_CHINESE_FLASHCARDS}", | |
}, | |
json={ | |
"model": "gpt-image-1", | |
"prompt": content["image_prompt"] | |
+ "\n\nUse a soft apocalypse animation style, somewhere between science fiction and fantasy, with an emphasis on serenity and overgrown foliage. Don't put any text in the image.", | |
"output_format": "png", | |
"quality": "low", | |
"size": "1024x1024", | |
}, | |
).json() | |
print(" images: ", len(image_response.get("data", []))) | |
if len(image_response.get("data", [])) != 0: | |
with open(word.image_filename(), "wb") as file: | |
file.write(base64.b64decode(image_response["data"][0]["b64_json"])) | |
except Exception as err: | |
print(f"{type(err).__name__}: {err}") | |
continue | |
if "usage" in response and "usage" in image_response: | |
print( | |
" cost: $" | |
+ str( | |
2 * response["usage"]["prompt_tokens"] / 1e6 | |
+ 8 * response["usage"]["completion_tokens"] / 1e6 | |
+ 10 * image_response["usage"]["input_tokens"] / 1e6 | |
+ 40 * image_response["usage"]["output_tokens"] / 1e6 | |
) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment