Skip to content

Instantly share code, notes, and snippets.

@SebDeclercq
Created April 26, 2022 09:31
Show Gist options
  • Select an option

  • Save SebDeclercq/39f06e29c64a32a2a3c0356eddc81856 to your computer and use it in GitHub Desktop.

Select an option

Save SebDeclercq/39f06e29c64a32a2a3c0356eddc81856 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "5b4430c9",
"metadata": {},
"outputs": [],
"source": [
"from __future__ import annotations\n",
"from collections import defaultdict\n",
"from pathlib import Path\n",
"from typing import ClassVar, TextIO\n",
"import csv\n",
"import re\n",
"\n",
"\n",
"class Book:\n",
" '''Class representing a book w/ some of the metadata available\n",
" in the safari-annotations-export.csv file\n",
" \n",
" Attrs:\n",
" title: The title (duh)\n",
" slug: The slug to use for the filename (for instance)\n",
" html_filename: The filename to use for the HTML export\n",
" chapters: The **ordered** list of the book's chapters\n",
" highlights: A list of the highlights, chapter by chapter\n",
" exporter: The Exporter to use as formatter\n",
" '''\n",
" \n",
" def __init__(self, *, title: str, exporter: Exporter, slug: str = '') -> None:\n",
" self.title: str = title\n",
" self.slug: str = slug\n",
" self.chapters: list[str] = []\n",
" self.highlights: defaultdict[str, list[str]] = defaultdict(list)\n",
" if not self.slug:\n",
" self.slug = re.sub(r'[^\\w]+', '-', self.title.lower()).replace('--', '-')\n",
" self.exporter: Exporter = exporter\n",
"\n",
" def get_highlights(self) -> defaultdict[str, list[str]]:\n",
" self.highlights = defaultdict(list)\n",
" return self.exporter.get_highlights(self).highlights\n",
" \n",
" @property\n",
" def to_file(self) -> None:\n",
" '''Export the highlights to HTML'''\n",
" self.exporter.export(self)\n",
" \n",
" @property\n",
" def filename(self) -> str:\n",
" return self.exporter.get_filename(self)\n",
" \n",
" \n",
"class Exporter:\n",
" '''safari-annotations-export.csv file parser'''\n",
" OREILLY_EXPORT_FILENAME: ClassVar[Path] = Path(\"oreilly-annotations.csv\")\n",
" EXTENSION: ClassVar[str] = ''\n",
" \n",
" @classmethod\n",
" def get_highlights(cls, book: Book) -> Book:\n",
" '''Parse the csv file in order to collect all the highlights\n",
" for a specific book.\n",
" \n",
" Params:\n",
" book: The book to search for.\n",
" \n",
" Returns:\n",
" The book fed w/ its chapters & highlights.\n",
" \n",
" '''\n",
" with cls.OREILLY_EXPORT_FILENAME.open(encoding='utf8') as file:\n",
" reader: csv.DictReader = csv.DictReader(file)\n",
" for record in reader:\n",
" if record['Book Title'] == book.title:\n",
" chapter: str = record['Chapter Title']\n",
" highlight: str = record['Highlight']\n",
" if chapter not in book.chapters:\n",
" book.chapters.insert(0, chapter)\n",
" book.highlights[chapter].append(highlight)\n",
" return book\n",
"\n",
" @classmethod\n",
" def get_filename(cls, book: Book) -> str:\n",
" return Path(f\"{book.slug}.{cls.EXTENSION}\")\n",
" \n",
" @classmethod\n",
" def export(cls, book: Book) -> Path:\n",
" raise NotImplementedError()\n",
"\n",
" \n",
"class OReillyCSV2HTML(Exporter): \n",
" HEADER: ClassVar[str] = '''<!DOCTYPE html>\n",
" <html>\n",
" <head>\n",
" <meta http-equiv=\"Content-type\" content=\"text/html;charset=UTF-8\">\n",
" <title>HTML_TITLE</title>\n",
" <link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/npm/katex@0.10.2/dist/katex.min.css\" integrity=\"sha384-yFRtMMDnQtDRO8rLpMIKrtPCD5jdktao2TV19YiZYWMDkUR5GQZR/NOVTdquEx1j\" crossorigin=\"anonymous\">\n",
" <link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css\">\n",
" <link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css\">\n",
" <link href=\"https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.css\" rel=\"stylesheet\" type=\"text/css\">\n",
" <style>\n",
".task-list-item { list-style-type: none; } .task-list-item-checkbox { margin-left: -20px; vertical-align: middle; }\n",
"</style>\n",
" <style>\n",
" body {\n",
" font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', 'Ubuntu', 'Droid Sans', sans-serif;\n",
" font-size: 14px;\n",
" line-height: 1.6;\n",
" }\n",
" </style>\n",
" \n",
" <script src=\"https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.js\"></script>\n",
" </head>\n",
" <body>'''\n",
"\n",
" FOOTER: ClassVar[str] = '''\n",
" </body>\n",
" </html>'''\n",
" \n",
" EXTENSION: ClassVar[str] = \".html\"\n",
" \n",
" @classmethod\n",
" def export(cls, book: Book) -> Path:\n",
" '''Export a book to HTML.\n",
" \n",
" Params:\n",
" book: The book to export\n",
" '''\n",
" with book.filename.open('w', encoding='utf8') as file:\n",
" file.write(cls.HEADER.replace('HTML_TITLE', book.title))\n",
" file.write(f'<h1>{book.title}</h1>\\n')\n",
" for chapter in book.chapters:\n",
" file.write(f'<h2>{chapter}</h2>\\n')\n",
" file.write('<ul>\\n')\n",
" for highlight in book.highlights[chapter]:\n",
" file.write('<li>\\n<blockquote>\\n')\n",
" fmt_highlight = ''.join([f'<p>{line}</p>\\n' for line in highlight.split('\\n')])\n",
" file.write(fmt_highlight)\n",
" file.write('</blockquote>\\n</li>\\n')\n",
" file.write('</ul>\\n')\n",
" file.write(cls.FOOTER)\n",
" return book.filename\n",
" \n",
" \n",
" \n",
"class OReillyCSV2MD(Exporter):\n",
" '''safari-annotations-export.csv file parser'''\n",
" \n",
" EXTENSION: ClassVar[str] = \".md\"\n",
" \n",
" @classmethod\n",
" def format(cls, string: str) -> str:\n",
" return f\"{string}\\n\\n\"\n",
" \n",
" @classmethod\n",
" def print(cls, file: TextIO, string: str) -> None:\n",
" file.write(cls.format(string))\n",
" \n",
" @classmethod\n",
" def export(cls, book: Book) -> Path:\n",
" book.get_highlights()\n",
" with book.filename.open(\"w\", encoding=\"utf8\") as file:\n",
" for chapter in book.chapters:\n",
" cls.print(file, f\"## {chapter}\")\n",
" for highlight in book.highlights[chapter]:\n",
" cleaned_highligth: str = '\\n>'.join(highlight.split('\\n'))\n",
" cls.print(file, f\"- > {cleaned_highligth}\")\n",
" return book.filename\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "167fb71b",
"metadata": {},
"outputs": [],
"source": [
"titles: list[str] = [\n",
" \"Extreme Programming Explained: Embrace Change, Second Edition\",\n",
" \"Planning Extreme Programming\",\n",
"]\n",
"for title in titles:\n",
" Book(title=title, exporter=OReillyCSV2MD).to_file"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.2"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment