Skip to content

Instantly share code, notes, and snippets.

@SebDeclercq
Created July 10, 2020 13:42
Show Gist options
  • Save SebDeclercq/6f2653719e4ee3852a635f84f1a22ed1 to your computer and use it in GitHub Desktop.
Save SebDeclercq/6f2653719e4ee3852a635f84f1a22ed1 to your computer and use it in GitHub Desktop.
from collections import defaultdict
from pathlib import Path
from typing import ClassVar, DefaultDict, List, Tuple
import csv
import re
class Book:
'''Class representing a book w/ some of the metadata available
in the safari-annotations-export.csv file
Attrs:
title: The title (duh)
slug: The slug to use for the filename (for instance)
html_filename: The filename to use for the HTML export
chapters: The **ordered** list of the book's chapters
highlights: A list of the highlights, chapter by chapter
'''
def __init__(self, *, title: str, slug: str = '') -> None:
self.title: str = title
self.slug: str = slug
self.chapters: List[str] = []
self.highlights: DefaultDict[str, List[str]] = defaultdict(list)
if not self.slug:
self.slug = re.sub(r'[^\W]', '-', self.title)
self.html_filename: Path = Path(f'{self.slug}.html')
@property
def to_html(self) -> None:
'''Export the highlights to HTML'''
OReillyCSV2HTML.export_to_html(self)
class OReillyCSV2HTML:
'''safari-annotations-export.csv file parser'''
OREILLY_EXPORT_FILENAME: Path = Path('safari-annotations-export.csv')
HEADER: str = '''<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-type" content="text/html;charset=UTF-8">
<title>HTML_TITLE</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.css" integrity="sha384-yFRtMMDnQtDRO8rLpMIKrtPCD5jdktao2TV19YiZYWMDkUR5GQZR/NOVTdquEx1j" crossorigin="anonymous">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
<link href="https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.css" rel="stylesheet" type="text/css">
<style>
.task-list-item { list-style-type: none; } .task-list-item-checkbox { margin-left: -20px; vertical-align: middle; }
</style>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', 'Ubuntu', 'Droid Sans', sans-serif;
font-size: 14px;
line-height: 1.6;
}
</style>
<script src="https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.js"></script>
</head>
<body>'''
FOOTER: str = '''
</body>
</html>'''
@classmethod
def get_highlights(cls, book: Book) -> Book:
'''Parse the csv file in order to collect all the highlights
for a specific book.
Params:
book: The book to search for.
Returns:
The book fed w/ its chapters & highlights.
'''
with cls.OREILLY_EXPORT_FILENAME.open(encoding='utf8') as file:
reader: csv.DictReader = csv.DictReader(file)
for record in reader:
if record['Book Title'] == book.title:
chapter: str = record['Chapter Title']
highlight: str = record['Highlight']
if chapter not in book.chapters:
book.chapters.insert(0, chapter)
book.highlights[chapter].append(highlight)
return book
@classmethod
def export_to_html(cls, book: Book) -> None:
'''Export a book to HTML.
Params:
book: The book to export
'''
with book.html_filename.open('w', encoding='utf8') as file:
file.write(cls.HEADER.replace('HTML_TITLE', book.title))
file.write(f'<h1>{book.title}</h1>\n')
for chapter in book.chapters:
file.write(f'<h2>{chapter}</h2>\n')
file.write('<ul>\n')
for highlight in book.highlights[chapter]:
file.write('<li>\n<blockquote>\n')
fmt_highlight = ''.join([f'<p>{line}</p>\n' for line in highlight.split('\n')])
file.write(fmt_highlight)
file.write('</blockquote>\n</li>\n')
file.write('</ul>\n')
file.write(cls.FOOTER)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment