SebDeclercq · July 10, 2020 13:42
diff --git a/oreilly-export-csv2html.py b/oreilly-export-csv2html.py
 from collections import defaultdict
 from pathlib import Path
 from typing import ClassVar, DefaultDict, List, Tuple
 import csv
 import re


 class Book:
    '''Class representing a book w/ some of the metadata available
    in the safari-annotations-export.csv file
    
    Attrs:
        title: The title (duh)
        slug: The slug to use for the filename (for instance)
        html_filename: The filename to use for the HTML export
        chapters: The **ordered** list of the book's chapters
        highlights: A list of the highlights, chapter by chapter
    '''
    
    def __init__(self, *, title: str, slug: str = '') -> None:
        self.title: str = title
        self.slug: str = slug
        self.chapters: List[str] = []
        self.highlights: DefaultDict[str, List[str]] = defaultdict(list)
        if not self.slug:
            self.slug = re.sub(r'[^\W]', '-', self.title)
        self.html_filename: Path = Path(f'{self.slug}.html')

    @property
    def to_html(self) -> None:
        '''Export the highlights to HTML'''
        OReillyCSV2HTML.export_to_html(self)
            
            
 class OReillyCSV2HTML:
    '''safari-annotations-export.csv file parser'''
    
    OREILLY_EXPORT_FILENAME: Path = Path('safari-annotations-export.csv')
    
    HEADER: str = '''<!DOCTYPE html>
    <html>
    <head>
        <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
        <title>HTML_TITLE</title>
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.css" integrity="sha384-yFRtMMDnQtDRO8rLpMIKrtPCD5jdktao2TV19YiZYWMDkUR5GQZR/NOVTdquEx1j" crossorigin="anonymous">
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
        <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
        <link href="https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.css" rel="stylesheet" type="text/css">
        <style>
 .task-list-item { list-style-type: none; } .task-list-item-checkbox { margin-left: -20px; vertical-align: middle; }
 </style>
        <style>
            body {
                font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', 'Ubuntu', 'Droid Sans', sans-serif;
                font-size: 14px;
                line-height: 1.6;
            }
        </style>
        
        <script src="https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.js"></script>
    </head>
    <body>'''

    FOOTER: str = '''
        </body>
        </html>'''
    
    @classmethod
    def get_highlights(cls, book: Book) -> Book:
        '''Parse the csv file in order to collect all the highlights
        for a specific book.
        
        Params:
            book: The book to search for.
            
        Returns:
            The book fed w/ its chapters & highlights.
        
        '''
        with cls.OREILLY_EXPORT_FILENAME.open(encoding='utf8') as file:
            reader: csv.DictReader = csv.DictReader(file)
            for record in reader:
                if record['Book Title'] == book.title:
                    chapter: str = record['Chapter Title']
                    highlight: str = record['Highlight']
                    if chapter not in book.chapters:
                        book.chapters.insert(0, chapter)
                    book.highlights[chapter].append(highlight)
        return book
    
    @classmethod
    def export_to_html(cls, book: Book) -> None:
        '''Export a book to HTML.
        
        Params:
            book: The book to export
        '''
        with book.html_filename.open('w', encoding='utf8') as file:
            file.write(cls.HEADER.replace('HTML_TITLE', book.title))
            file.write(f'<h1>{book.title}</h1>\n')
            for chapter in book.chapters:
                file.write(f'<h2>{chapter}</h2>\n')
                file.write('<ul>\n')
                for highlight in book.highlights[chapter]:
                    file.write('<li>\n<blockquote>\n')
                    fmt_highlight = ''.join([f'<p>{line}</p>\n' for line in highlight.split('\n')])
                    file.write(fmt_highlight)
                    file.write('</blockquote>\n</li>\n')
                file.write('</ul>\n')
            file.write(cls.FOOTER)
	from collections import defaultdict
	from pathlib import Path
	from typing import ClassVar, DefaultDict, List, Tuple
	import csv
	import re


	class Book:
	'''Class representing a book w/ some of the metadata available
	in the safari-annotations-export.csv file

	Attrs:
	title: The title (duh)
	slug: The slug to use for the filename (for instance)
	html_filename: The filename to use for the HTML export
	chapters: The ordered list of the book's chapters
	highlights: A list of the highlights, chapter by chapter
	'''

	def __init__(self, *, title: str, slug: str = '') -> None:
	self.title: str = title
	self.slug: str = slug
	self.chapters: List[str] = []
	self.highlights: DefaultDict[str, List[str]] = defaultdict(list)
	if not self.slug:
	self.slug = re.sub(r'[^\W]', '-', self.title)
	self.html_filename: Path = Path(f'{self.slug}.html')

	@property
	def to_html(self) -> None:
	'''Export the highlights to HTML'''
	OReillyCSV2HTML.export_to_html(self)


	class OReillyCSV2HTML:
	'''safari-annotations-export.csv file parser'''

	OREILLY_EXPORT_FILENAME: Path = Path('safari-annotations-export.csv')

	HEADER: str = '''<!DOCTYPE html>
	<html>
	<head>
	<meta http-equiv="Content-type" content="text/html;charset=UTF-8">
	<title>HTML_TITLE</title>
	<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/katex.min.css" integrity="sha384-yFRtMMDnQtDRO8rLpMIKrtPCD5jdktao2TV19YiZYWMDkUR5GQZR/NOVTdquEx1j" crossorigin="anonymous">
	<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/markdown.css">
	<link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/Microsoft/vscode/extensions/markdown-language-features/media/highlight.css">
	<link href="https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.css" rel="stylesheet" type="text/css">
	<style>
	.task-list-item { list-style-type: none; } .task-list-item-checkbox { margin-left: -20px; vertical-align: middle; }
	</style>
	<style>
	body {
	font-family: -apple-system, BlinkMacSystemFont, 'Segoe WPC', 'Segoe UI', 'Ubuntu', 'Droid Sans', sans-serif;
	font-size: 14px;
	line-height: 1.6;
	}
	</style>

	<script src="https://cdn.jsdelivr.net/npm/katex-copytex@latest/dist/katex-copytex.min.js"></script>
	</head>
	<body>'''

	FOOTER: str = '''
	</body>
	</html>'''

	@classmethod
	def get_highlights(cls, book: Book) -> Book:
	'''Parse the csv file in order to collect all the highlights
	for a specific book.

	Params:
	book: The book to search for.

	Returns:
	The book fed w/ its chapters & highlights.

	'''
	with cls.OREILLY_EXPORT_FILENAME.open(encoding='utf8') as file:
	reader: csv.DictReader = csv.DictReader(file)
	for record in reader:
	if record['Book Title'] == book.title:
	chapter: str = record['Chapter Title']
	highlight: str = record['Highlight']
	if chapter not in book.chapters:
	book.chapters.insert(0, chapter)
	book.highlights[chapter].append(highlight)
	return book

	@classmethod
	def export_to_html(cls, book: Book) -> None:
	'''Export a book to HTML.

	Params:
	book: The book to export
	'''
	with book.html_filename.open('w', encoding='utf8') as file:
	file.write(cls.HEADER.replace('HTML_TITLE', book.title))
	file.write(f'<h1>{book.title}</h1>\n')
	for chapter in book.chapters:
	file.write(f'<h2>{chapter}</h2>\n')
	file.write('<ul>\n')
	for highlight in book.highlights[chapter]:
	file.write('<li>\n<blockquote>\n')
	fmt_highlight = ''.join([f'<p>{line}</p>\n' for line in highlight.split('\n')])
	file.write(fmt_highlight)
	file.write('</blockquote>\n</li>\n')
	file.write('</ul>\n')
	file.write(cls.FOOTER)