Last active
May 11, 2024 16:48
-
-
Save eigengrau/9d593c7906fde9fddbd703fdb62e2dbd to your computer and use it in GitHub Desktop.
Export Firefox bookmarks to org-mode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Read a Mozilla places JSON-structure from STDIN and create a directory | |
# hierarchy of org-mode files containing bookmarks. | |
# Copyright (c) 2014 Alexey Kutepov a.k.a. rexim | |
# Copyright (c) 2016 Sebastian Reuße | |
# Permission is hereby granted, free of charge, to any person | |
# obtaining a copy of this software and associated documentation files | |
# (the "Software"), to deal in the Software without restriction, | |
# including without limitation the rights to use, copy, modify, merge, | |
# publish, distribute, sublicense, and/or sell copies of the Software, | |
# and to permit persons to whom the Software is furnished to do so, | |
# subject to the following conditions: | |
# The above copyright notice and this permission notice shall be | |
# included in all copies or substantial portions of the Software. | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
import collections | |
import io | |
import itertools | |
import json | |
import os | |
import sys | |
import time | |
import click | |
def org_sanitize_text(string): | |
""""Sanitize bookmark titles or descriptions. | |
Removes line breaks and org-mode link syntax. | |
""" | |
return string.translate( | |
str.maketrans({ | |
'\n': None, | |
'\r': None, | |
'[': '⟦', | |
']': '⟧' | |
}) | |
) | |
def org_sanitize_tag(string): | |
"Sanitize tags for use as org-mode tag tokens." | |
return string.translate( | |
str.maketrans({ | |
' ': '_', | |
'-': '_' | |
}) | |
) | |
def convert_timestamp(timestamp): | |
"Convert Mozilla timestamp tokens to org-mode timestamps." | |
timestamp = str(timestamp) | |
# Places timestamps are epoch floats, where the period has been dropped | |
# after the first ten digits. | |
float_time = float( | |
"%s.%s" % (timestamp[:10], timestamp[10:]) | |
) | |
return time.strftime('[%Y-%m-%d %H:%M]', time.localtime(float_time)) | |
def make_leading_dirs(container): | |
"""Since a bookmark folder might not have any directory children, we only | |
create all directories but the last one. | |
""" | |
leading_dirs = container[:-1] | |
if leading_dirs: | |
os.makedirs(os.path.join(*leading_dirs), exist_ok=True) | |
def render_properties(props): | |
"Render a dictionary as an org-mode properties drawer." | |
out = io.StringIO() | |
print(':PROPERTIES:', file=out) | |
for key, value in props.items(): | |
print(':%s: %s' % (key, value), file=out) | |
print(':END:', file=out) | |
return out.getvalue() | |
def is_place(node): | |
return node['type'] == 'text/x-moz-place' | |
def is_container(node): | |
return node['type'] == 'text/x-moz-place-container' | |
def export_node(node, container=(), dir_depth=None, at=0): | |
"""Export a bookmark node. | |
container: The path to the current node. Note that this doesn’t represent a | |
file-system path, but rather the path inside the bookmarks structure. | |
dir_depth: After reaching this depth, serialize bookmark containers as | |
org-mode headlines instead of creating file-system directories. | |
at: The current level of nesting for creating org-mode headings. | |
""" | |
print("> {path} [{title}]".format( | |
path='/'.join(container), | |
title=node.get('title', 'No title') | |
)) | |
if is_container(node): | |
export_container(node, container, dir_depth, at) | |
elif is_place(node): | |
export_place(node, container, dir_depth, at) | |
else: | |
raise ValueError("Unknown node type: {}".format(node.get('type'))) | |
def export_container(node, container=(), dir_depth=None, at=0): | |
title = node.get('title') | |
# Title may be present, but empty. | |
if not title: | |
title = 'bookmarks' | |
children = sorted( | |
node.get('children', []), | |
key=lambda child: child['index'] | |
) | |
# Serialize container children first, to keep things orderly. | |
children = list(itertools.chain( | |
filter(is_container, children), | |
filter(is_place, children) | |
)) | |
# Decide how we map the current bookmark path between file-system paths and | |
# the org-mode outline structure. | |
if len(container) < dir_depth: | |
next_container = container + (title.replace('/', '&'),) | |
next_at = 0 | |
else: | |
make_leading_dirs(container) | |
outfile_name = '%s.org' % os.path.join(*container) | |
with open(outfile_name, 'a') as outfile: | |
headline = '%s %s' % ((at + 1) * '*', title) | |
print(headline, file=outfile) | |
next_container = container | |
next_at = at + 1 | |
# Recurse. | |
for child in children: | |
export_node(child, next_container, dir_depth, next_at) | |
def export_place(node, container=(), dir_depth=None, at=0): | |
# Let’s keep the properties drawers orderly by always using the same | |
# ordering property entries. | |
properties = collections.OrderedDict() | |
# Title/URI/link. | |
title = properties['Title'] = org_sanitize_text(node.get('title', '')) | |
uri = properties['URI'] = node.get('uri', '') | |
if title and uri: | |
link = '[[%s][%s]]' % (uri, title) | |
elif uri: | |
link = '[[%s]]' % uri | |
else: | |
raise ValueError("No URI for node: %s" % node) | |
# Tags. | |
tags = node.get('tags', '') | |
if tags: | |
tags = tags.split(',') | |
tags = map(org_sanitize_tag, tags) | |
tags = ' :%s:' % ':'.join(tags) | |
# The places time stamps are not regular epoch seconds, but seem to be | |
# floats with the period left out. | |
properties['Added'] = convert_timestamp(node['dateAdded']) | |
properties['Modified'] = convert_timestamp(node['lastModified']) | |
# Bookmark description. | |
if 'annos' in bookmarks: | |
description = [ | |
anno for anno in bookmarks['annos'] | |
if (anno.get('name') == 'bookmarkProperties/description') | |
] | |
if description: | |
description = description[0].get('value', '') | |
description = org_sanitize_text(description) | |
properties['Description'] = description | |
# Serialize the data and write to file. | |
headline = '%s %s%s' % ((at + 1) * '*', link, tags) | |
properties = render_properties(properties) | |
make_leading_dirs(container) | |
outfile_name = '%s.org' % os.path.join(*container) | |
with open(outfile_name, 'a') as outfile: | |
print(headline, file=outfile) | |
print(properties, file=outfile, end='') | |
@click.command() | |
@click.argument( | |
'infile', | |
type=click.File('rb'), | |
default=(lambda: sys.stdin) | |
) | |
@click.option( | |
'--depth', | |
type=int, | |
default=3, | |
help="Create individual bookmark files up to this depth." | |
) | |
def main(infile, depth): | |
global bookmarks | |
bookmarks = json.load(infile) | |
export_node(bookmarks, dir_depth=depth) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from setuptools import setup | |
setup( | |
name='firefox-to-org', | |
version='0.1.0.0', | |
description=( | |
"Export Firefox bookmarks into org-mode files." | |
), | |
author="Sebastian Reuße", | |
author_email='[email protected]', | |
py_modules=['firefox_to_org'], | |
install_requires=['click'], | |
entry_points={ | |
'console_scripts': [ | |
'firefox-to-org = firefox_to_org:main' | |
], | |
} | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment