okomestudio · July 20, 2020 23:31
diff --git a/jwasham-anki-builder.py b/jwasham-anki-builder.py
 #!/usr/bin/env python
 """Convert the CS flash cards by jwasham to CSV for Anki import.

 On import:

  - Choose tab as the field separator

  - Ignore lines where first field matches existing note

  - Allow HTML in fields

 Some tags are added (c, c++, python, code, and vocab) to distinguish the type of cards
 and their content. Try importing into a separate deck in order to avoid contaminating
 existing ones.

 For the source data, see: https://github.com/jwasham/computer-science-flash-cards
 """
 import csv
 import re
 import sqlite3
 import sys
 from argparse import ArgumentParser
 from contextlib import contextmanager
 from enum import IntEnum


 @contextmanager
 def fopen(filename=None, mode="r"):
    if filename is None or filename == "-":
        f = sys.stdout
        try:
            yield f
        finally:
            f.close()
    else:
        with open(filename, mode) as f:
            yield f


 class Type(IntEnum):
    VOCAB = 1
    CODE = 2


 def convert(input, output):
    conn = sqlite3.connect(input)
    conn.text_factory = str
    cursor = conn.cursor()
    cursor.execute("SELECT type, front, back FROM cards")
    items = cursor.fetchall()
    write_to_csv(output, items)


 def infer_coding_language(front, back, tags):
    """Heuristically infer the coding language used."""
    if re.search(r"\bC\+\+(?!\w)", front, re.IGNORECASE):
        tags.add("c++")
    if re.search(r"\bPython\b", front, re.IGNORECASE):
        tags.add("python")
    if re.search(r"\bC\b", front, re.IGNORECASE):
        tags.add("c")

    if re.search(r"\bdef [a-zA-Z]+", back):
        tags.add("python")

    if re.search(r"\b(void|int) [a-zA-Z]+", back):
        tags.add("c")


 def write_to_csv(output, items, tags=None):
    common_tags = tags or set()
    with fopen(output, "w") as f:
        writer = csv.writer(f, delimiter="\t", lineterminator="\n")
        if tags:
            writer.writerow([f"tags: {' '.join(tags)}"])

        for item_type, front, back in items:
            tags = common_tags.copy() if common_tags else set()

            infer_coding_language(front, back, tags)

            if item_type == Type.CODE:
                tags.add("code")
                back = f"<pre>{back}</pre>"
            elif item_type == Type.VOCAB:
                tags.add("vocab")

            back = back.replace("\r\n", "<br>")

            writer.writerow([front, back, " ".join(tags)])


 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("input", help="SQLite database file")
    parser.add_argument("--output", "-o", help="Output CSV file", default=None)
    args = parser.parse_args()
    convert(args.input, args.output)
	#!/usr/bin/env python
	"""Convert the CS flash cards by jwasham to CSV for Anki import.

	On import:

	- Choose tab as the field separator

	- Ignore lines where first field matches existing note

	- Allow HTML in fields

	Some tags are added (c, c++, python, code, and vocab) to distinguish the type of cards
	and their content. Try importing into a separate deck in order to avoid contaminating
	existing ones.

	For the source data, see: https://github.com/jwasham/computer-science-flash-cards
	"""
	import csv
	import re
	import sqlite3
	import sys
	from argparse import ArgumentParser
	from contextlib import contextmanager
	from enum import IntEnum


	@contextmanager
	def fopen(filename=None, mode="r"):
	if filename is None or filename == "-":
	f = sys.stdout
	try:
	yield f
	finally:
	f.close()
	else:
	with open(filename, mode) as f:
	yield f


	class Type(IntEnum):
	VOCAB = 1
	CODE = 2


	def convert(input, output):
	conn = sqlite3.connect(input)
	conn.text_factory = str
	cursor = conn.cursor()
	cursor.execute("SELECT type, front, back FROM cards")
	items = cursor.fetchall()
	write_to_csv(output, items)


	def infer_coding_language(front, back, tags):
	"""Heuristically infer the coding language used."""
	if re.search(r"\bC\+\+(?!\w)", front, re.IGNORECASE):
	tags.add("c++")
	if re.search(r"\bPython\b", front, re.IGNORECASE):
	tags.add("python")
	if re.search(r"\bC\b", front, re.IGNORECASE):
	tags.add("c")

	if re.search(r"\bdef [a-zA-Z]+", back):
	tags.add("python")

	if re.search(r"\b(void\|int) [a-zA-Z]+", back):
	tags.add("c")


	def write_to_csv(output, items, tags=None):
	common_tags = tags or set()
	with fopen(output, "w") as f:
	writer = csv.writer(f, delimiter="\t", lineterminator="\n")
	if tags:
	writer.writerow([f"tags: {' '.join(tags)}"])

	for item_type, front, back in items:
	tags = common_tags.copy() if common_tags else set()

	infer_coding_language(front, back, tags)

	if item_type == Type.CODE:
	tags.add("code")
	back = f"<pre>{back}</pre>"
	elif item_type == Type.VOCAB:
	tags.add("vocab")

	back = back.replace("\r\n", "<br>")

	writer.writerow([front, back, " ".join(tags)])


	if __name__ == "__main__":
	parser = ArgumentParser()
	parser.add_argument("input", help="SQLite database file")
	parser.add_argument("--output", "-o", help="Output CSV file", default=None)
	args = parser.parse_args()
	convert(args.input, args.output)