Created
June 25, 2023 05:37
-
-
Save Techcable/0021d9ff97a787d2083fe87e3cd11fe6 to your computer and use it in GitHub Desktop.
Convert Firefox bookmarks json -> raindrop.io CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Converts firefox's JSON bookmarks format to the CSV format raindrop.io expects | |
See here for docs on raindrop: https://help.raindrop.io/import/#csv | |
""" | |
from __future__ import annotations | |
from typing import ( | |
ClassVar, Iterable, NewType, Any, Iterator | |
) | |
from enum import Enum, nonmember as enum_nonmember | |
import dataclasses as dc | |
import json | |
import operator | |
import functools | |
import sys | |
import csv | |
from io import TextIOBase | |
from datetime import datetime | |
import click | |
from support.simple_serde import NameStyle, ParseError, parse_type | |
class EntryType(Enum): | |
PLACE = "text/x-moz-place" | |
PLACE_CONTAINER = "text/x-moz-place-container" | |
PLACE_SEPERATOR = "text/x-moz-place-separator" | |
def parse_moz_datetime(tp: type, value: Any) -> datetime: | |
assert issubclass(tp, datetime), tp | |
if not isinstance(value, int): | |
raise ParseError(f"Value should be an integer timestamp: {value!r}") | |
primary, micros = divmod(value, 1000_000) | |
return datetime.fromtimestamp(primary).replace(microsecond=micros) | |
@dc.dataclass(kw_only=True) | |
class NormalizedEntry: | |
guid: str | |
title: str | |
tags: list[str] = dc.field( | |
metadata={"parser": lambda _tp, val: val.split(',')}, | |
default_factory=list | |
) | |
date_added: datetime = dc.field(metadata={"parser": parse_moz_datetime}) | |
last_modified: datetime = dc.field(metadata={"parser": parse_moz_datetime}) | |
entry_type: EntryType = dc.field(metadata={"name": "type"}) | |
root_id: str | None = dc.field(metadata={"name": "root"}, default=None) | |
children: list[NormalizedEntry] | None = dc.field(default=None, repr=False) | |
uri: str | None = None | |
keyword: str | None = None | |
_ignored_fields: ClassVar[set[str]] = {"index", "id", "typeCode", "iconUri", "postData", "charset"} | |
# convert from camel case -> snake case | |
_name_styling: ClassVar = (NameStyle.CAMEL_CASE, NameStyle.SNAKE_CASE) | |
@staticmethod | |
def parse(data: Any) -> NormalizedEntry: | |
return parse_type(NormalizedEntry, data) | |
def print( | |
self, target=sys.stdout, *, | |
level: int, child_limit: int | None = None | |
): | |
indent = ' ' * (2 * level) | |
num_children = len(self.children) if self.children is not None else 0 | |
data = self.uri or str(num_children) | |
print(f"{indent}* {self.title} -> {data[:30]}", file=target) | |
if self.children is not None: | |
for child in self.children[:child_limit]: | |
child.print(target, level=level + 1, child_limit=child_limit) | |
def __hash__(self): | |
return hash(self.guid) | |
def __eq__(self, other): | |
if isinstance(other, NormalizedEntry): | |
return self.guid == other.guid | |
else: | |
return NotImplemented | |
class RootChildType(Enum): | |
MENU = ("menu", "bookmarksMenuFolder") | |
TOOLBAR = ("toolbar", "toolbarFolder") | |
UNFILED = ("unfiled", "unfiledBookmarksFolder") | |
MOBILE = ("mobile", "mobileFolder") | |
title: str | |
root_id: str | |
def __new__(cls, title, root_id): | |
obj = object.__new__(cls) | |
obj._value_ = title | |
obj.title = title | |
obj.root_id = root_id | |
return obj | |
@property | |
def guid_prefix(self) -> str: | |
return self.title + "___" | |
@property | |
def human_name(self): | |
match self: | |
case RootChildType.MENU: | |
return "Bookmarks Menu" | |
case RootChildType.TOOLBAR: | |
return "Bookmarks Toolbar" | |
case RootChildType.UNFILED: | |
return "Other Bookmarks" | |
case RootChildType.MOBILE: | |
return "Mobile Bookmarks" | |
case _: | |
raise AssertionError | |
@dc.dataclass(frozen=True) | |
class EntryCsvMeta: | |
entry: NormalizedEntry | |
_: dc.KW_ONLY | |
human_name: str | |
parent: EntryCsvMeta | None | |
def __post_init__(self): | |
if ('/' in self.human_name and | |
self.entry.entry_type != EntryType.PLACE): | |
# HACK | |
object.__setattr__( | |
self, 'human_name', | |
self.human_name.replace('/', '<slash>') | |
) | |
def parents(self) -> Iterator[EntryCsvMeta]: | |
parent = self.parent | |
while parent is not None: | |
yield parent | |
parent = parent.parent | |
@functools.cached_property | |
def full_human_name(self) -> str: | |
parts = [self.human_name] | |
for parent in self.parents(): | |
assert '/' not in parent.human_name, parent | |
parts.append(parent.human_name) | |
parts.reverse() | |
return '/'.join(parts) | |
def write_csv(root_node: NormalizedEntry, output_file: TextIOBase): | |
assert root_node.guid.startswith("root___"), root_node.guid | |
assert root_node.root_id == "placesRoot", root_node.root_id | |
assert root_node.title == "", root_node.title | |
assert root_node.children is not None | |
assert root_node.entry_type == EntryType.PLACE_CONTAINER | |
root_children: dict[RootChildType, NormalizedEntry] = {} | |
resolved_meta: dict[NormalizedEntry, EntryCsvMeta] = {} | |
for child in root_node.children: | |
child_type: RootChildType = RootChildType(child.title) | |
assert child_type not in root_children, ( | |
f"Duplicate types: {child.title!r}" | |
) | |
assert child.guid.startswith(child_type.guid_prefix), child.guid | |
assert child.root_id == child.root_id, child.root_id | |
assert child.entry_type == EntryType.PLACE_CONTAINER | |
assert child.children is not None | |
root_children[child_type] = child | |
resolved_meta[child] = EntryCsvMeta( | |
entry=child, | |
human_name=child_type.human_name, | |
parent=None | |
) | |
assert set(root_children.keys()) == set(RootChildType), set(root_children.keys()) | |
assert len(root_children) == 4 | |
stack: list[NormalizedEntry] = list(root_children.values()) | |
while stack: | |
parent = stack.pop() | |
resolved_parent = resolved_meta[parent] | |
assert parent.children is not None | |
for child in parent.children: | |
assert child not in resolved_meta | |
resolved_meta[child] = EntryCsvMeta( | |
entry=child, | |
human_name=child.title, | |
parent=resolved_parent | |
) | |
if child.children: | |
stack.append(child) | |
resolved_meta_sorted = sorted( | |
filter( | |
lambda meta: meta.entry.entry_type == EntryType.PLACE, | |
resolved_meta.values() | |
), | |
key=operator.attrgetter('full_human_name') | |
) | |
writer = csv.DictWriter( | |
output_file, | |
('folder', 'title', 'url', 'description', 'tags', 'created'), | |
) | |
writer.writeheader() | |
for meta in resolved_meta_sorted: | |
assert meta.entry.entry_type == EntryType.PLACE | |
assert meta.parent is not None, meta | |
writer.writerow(dict( | |
url=meta.entry.uri, | |
folder=meta.parent.full_human_name, | |
title=meta.entry.title, | |
# description: <missing> | |
tags=','.join(meta.entry.tags), | |
created=meta.entry.date_added.isoformat() | |
)) | |
@click.command('convert') | |
@click.argument('input_file', type=click.File()) | |
@click.argument('output_file', type=click.File(mode='wt')) | |
@click.option('output_format', '--format', type=click.Choice(('text', 'csv'))) | |
def convert(input_file, output_format, output_file): | |
raw_data = json.load(input_file) | |
result = NormalizedEntry.parse(raw_data) | |
match output_format: | |
case 'text': | |
result.print(level=0) | |
case 'csv': | |
write_csv(result, output_file) | |
case _: | |
raise AssertionError | |
if __name__ == "__main__": | |
convert() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment