Created
July 20, 2020 23:31
-
-
Save okomestudio/ee4f83938bd7ded092b84c5a65d4159c to your computer and use it in GitHub Desktop.
Convert the CS flash cards by jwasham to CSV for Anki import.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Convert the CS flash cards by jwasham to CSV for Anki import. | |
On import: | |
- Choose tab as the field separator | |
- Ignore lines where first field matches existing note | |
- Allow HTML in fields | |
Some tags are added (c, c++, python, code, and vocab) to distinguish the type of cards | |
and their content. Try importing into a separate deck in order to avoid contaminating | |
existing ones. | |
For the source data, see: https://github.com/jwasham/computer-science-flash-cards | |
""" | |
import csv | |
import re | |
import sqlite3 | |
import sys | |
from argparse import ArgumentParser | |
from contextlib import contextmanager | |
from enum import IntEnum | |
@contextmanager | |
def fopen(filename=None, mode="r"): | |
if filename is None or filename == "-": | |
f = sys.stdout | |
try: | |
yield f | |
finally: | |
f.close() | |
else: | |
with open(filename, mode) as f: | |
yield f | |
class Type(IntEnum): | |
VOCAB = 1 | |
CODE = 2 | |
def convert(input, output): | |
conn = sqlite3.connect(input) | |
conn.text_factory = str | |
cursor = conn.cursor() | |
cursor.execute("SELECT type, front, back FROM cards") | |
items = cursor.fetchall() | |
write_to_csv(output, items) | |
def infer_coding_language(front, back, tags): | |
"""Heuristically infer the coding language used.""" | |
if re.search(r"\bC\+\+(?!\w)", front, re.IGNORECASE): | |
tags.add("c++") | |
if re.search(r"\bPython\b", front, re.IGNORECASE): | |
tags.add("python") | |
if re.search(r"\bC\b", front, re.IGNORECASE): | |
tags.add("c") | |
if re.search(r"\bdef [a-zA-Z]+", back): | |
tags.add("python") | |
if re.search(r"\b(void|int) [a-zA-Z]+", back): | |
tags.add("c") | |
def write_to_csv(output, items, tags=None): | |
common_tags = tags or set() | |
with fopen(output, "w") as f: | |
writer = csv.writer(f, delimiter="\t", lineterminator="\n") | |
if tags: | |
writer.writerow([f"tags: {' '.join(tags)}"]) | |
for item_type, front, back in items: | |
tags = common_tags.copy() if common_tags else set() | |
infer_coding_language(front, back, tags) | |
if item_type == Type.CODE: | |
tags.add("code") | |
back = f"<pre>{back}</pre>" | |
elif item_type == Type.VOCAB: | |
tags.add("vocab") | |
back = back.replace("\r\n", "<br>") | |
writer.writerow([front, back, " ".join(tags)]) | |
if __name__ == "__main__": | |
parser = ArgumentParser() | |
parser.add_argument("input", help="SQLite database file") | |
parser.add_argument("--output", "-o", help="Output CSV file", default=None) | |
args = parser.parse_args() | |
convert(args.input, args.output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment