Created
April 26, 2022 09:31
-
-
Save SebDeclercq/39f06e29c64a32a2a3c0356eddc81856 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "5b4430c9", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from __future__ import annotations\n", | |
| "from collections import defaultdict\n", | |
| "from pathlib import Path\n", | |
| "from typing import ClassVar, TextIO\n", | |
| "import csv\n", | |
| "import re\n", | |
| "\n", | |
| "\n", | |
| "class Book:\n", | |
| " '''Class representing a book w/ some of the metadata available\n", | |
| " in the safari-annotations-export.csv file\n", | |
| " \n", | |
| " Attrs:\n", | |
| " title: The title (duh)\n", | |
| " slug: The slug to use for the filename (for instance)\n", | |
| " html_filename: The filename to use for the HTML export\n", | |
| " chapters: The **ordered** list of the book's chapters\n", | |
| " highlights: A list of the highlights, chapter by chapter\n", | |
| " exporter: The Exporter to use as formatter\n", | |
| " '''\n", | |
| " \n", | |
| " def __init__(self, *, title: str, exporter: Exporter, slug: str = '') -> None:\n", | |
| " self.title: str = title\n", | |
| " self.slug: str = slug\n", | |
| " self.chapters: list[str] = []\n", | |
| " self.highlights: defaultdict[str, list[str]] = defaultdict(list)\n", | |
| " if not self.slug:\n", | |
| " self.slug = re.sub(r'[^\\w]+', '-', self.title.lower()).replace('--', '-')\n", | |
| " self.exporter: Exporter = exporter\n", | |
| "\n", | |
| " def get_highlights(self) -> defaultdict[str, list[str]]:\n", | |
| " self.highlights = defaultdict(list)\n", | |
| " return self.exporter.get_highlights(self).highlights\n", | |
| " \n", | |
| " @property\n", | |
| " def to_file(self) -> None:\n", | |
| " '''Export the highlights to HTML'''\n", | |
| " self.exporter.export(self)\n", | |
| " \n", | |
| " @property\n", | |
| " def filename(self) -> str:\n", | |
| " return self.exporter.get_filename(self)\n", | |
| " \n", | |
| " \n", | |
| "class Exporter:\n", | |
| " '''safari-annotations-export.csv file parser'''\n", | |
| " OREILLY_EXPORT_FILENAME: ClassVar[Path] = Path(\"oreilly-annotations.csv\")\n", | |
| " EXTENSION: ClassVar[str] = ''\n", | |
| " \n", | |
| " @classmethod\n", | |
| " def get_highlights(cls, book: Book) -> Book:\n", | |
| " '''Parse the csv file in order to collect all the highlights\n", | |
| " for a specific book.\n", | |
| " \n", | |
| " Params:\n", | |
| " book: The book to search for.\n", | |
| " \n", | |
| " Returns:\n", | |
| " The book fed w/ its chapters & highlights.\n", | |
| " \n", | |
| " '''\n", | |
| " with cls.OREILLY_EXPORT_FILENAME.open(encoding='utf8') as file:\n", | |
| " reader: csv.DictReader = csv.DictReader(file)\n", | |
| " for record in reader:\n", | |
| " if record['Book Title'] == book.title:\n", | |
| " chapter: str = record['Chapter Title']\n", | |
| " highlight: str = record['Highlight']\n", | |
| " if chapter not in book.chapters:\n", | |
| " book.chapters.insert(0, chapter)\n", | |
| " book.highlights[chapter].append(highlight)\n", | |
| " return book\n", | |
| "\n", | |
| " @classmethod\n", | |
| " def get_filename(cls, book: Book) -> str:\n", | |
| " return Path(f\"{book.slug}.{cls.EXTENSION}\")\n", | |
| " \n", | |
| " @classmethod\n", | |
| " def export(cls, book: Book) -> Path:\n", | |
| " raise NotImplementedError()\n", | |
| "\n", | |
| " \n", | |
| "class OReillyCSV2HTML(Exporter): \n", | |
| " HEADER: ClassVar[str] = '''<!DOCTYPE html>\n", | |
| " <html>\n", | |
| " <head>\n", | |
| " <meta http-equiv=\"Content-type\" content=\"text/html;charset=UTF-8\">\n", | |
| " <title>HTML_TITLE</title>\n", | |
| " <link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/npm/katex@0.10.2/dist/katex.min.css\" integrity=\"sha384-yFRtMMDnQtDRO8rLpMIKrtPCD5jdktao2TV19YiZYWMDkUR5GQZR/NOVTdquEx1j\" crossorigin=\"anonymous\">\n", | |
| " <link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css\">\n", | |
| " <link rel=\"stylesheet\" href=\"https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css\">\n", | |
| " <link href=\"https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.css\" rel=\"stylesheet\" type=\"text/css\">\n", | |
| " <style>\n", | |
| ".task-list-item { list-style-type: none; } .task-list-item-checkbox { margin-left: -20px; vertical-align: middle; }\n", | |
| "</style>\n", | |
| " <style>\n", | |
| " body {\n", | |
| " font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', 'Ubuntu', 'Droid Sans', sans-serif;\n", | |
| " font-size: 14px;\n", | |
| " line-height: 1.6;\n", | |
| " }\n", | |
| " </style>\n", | |
| " \n", | |
| " <script src=\"https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.js\"></script>\n", | |
| " </head>\n", | |
| " <body>'''\n", | |
| "\n", | |
| " FOOTER: ClassVar[str] = '''\n", | |
| " </body>\n", | |
| " </html>'''\n", | |
| " \n", | |
| " EXTENSION: ClassVar[str] = \".html\"\n", | |
| " \n", | |
| " @classmethod\n", | |
| " def export(cls, book: Book) -> Path:\n", | |
| " '''Export a book to HTML.\n", | |
| " \n", | |
| " Params:\n", | |
| " book: The book to export\n", | |
| " '''\n", | |
| " with book.filename.open('w', encoding='utf8') as file:\n", | |
| " file.write(cls.HEADER.replace('HTML_TITLE', book.title))\n", | |
| " file.write(f'<h1>{book.title}</h1>\\n')\n", | |
| " for chapter in book.chapters:\n", | |
| " file.write(f'<h2>{chapter}</h2>\\n')\n", | |
| " file.write('<ul>\\n')\n", | |
| " for highlight in book.highlights[chapter]:\n", | |
| " file.write('<li>\\n<blockquote>\\n')\n", | |
| " fmt_highlight = ''.join([f'<p>{line}</p>\\n' for line in highlight.split('\\n')])\n", | |
| " file.write(fmt_highlight)\n", | |
| " file.write('</blockquote>\\n</li>\\n')\n", | |
| " file.write('</ul>\\n')\n", | |
| " file.write(cls.FOOTER)\n", | |
| " return book.filename\n", | |
| " \n", | |
| " \n", | |
| " \n", | |
| "class OReillyCSV2MD(Exporter):\n", | |
| " '''safari-annotations-export.csv file parser'''\n", | |
| " \n", | |
| " EXTENSION: ClassVar[str] = \".md\"\n", | |
| " \n", | |
| " @classmethod\n", | |
| " def format(cls, string: str) -> str:\n", | |
| " return f\"{string}\\n\\n\"\n", | |
| " \n", | |
| " @classmethod\n", | |
| " def print(cls, file: TextIO, string: str) -> None:\n", | |
| " file.write(cls.format(string))\n", | |
| " \n", | |
| " @classmethod\n", | |
| " def export(cls, book: Book) -> Path:\n", | |
| " book.get_highlights()\n", | |
| " with book.filename.open(\"w\", encoding=\"utf8\") as file:\n", | |
| " for chapter in book.chapters:\n", | |
| " cls.print(file, f\"## {chapter}\")\n", | |
| " for highlight in book.highlights[chapter]:\n", | |
| " cleaned_highligth: str = '\\n>'.join(highlight.split('\\n'))\n", | |
| " cls.print(file, f\"- > {cleaned_highligth}\")\n", | |
| " return book.filename\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "167fb71b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "titles: list[str] = [\n", | |
| " \"Extreme Programming Explained: Embrace Change, Second Edition\",\n", | |
| " \"Planning Extreme Programming\",\n", | |
| "]\n", | |
| "for title in titles:\n", | |
| " Book(title=title, exporter=OReillyCSV2MD).to_file" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.10.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment