Skip to content

Instantly share code, notes, and snippets.

@JnBrymn
Created November 17, 2019 05:33
Show Gist options
  • Save JnBrymn/3795e25fbe27a6aad8f516c05fa294b6 to your computer and use it in GitHub Desktop.
Save JnBrymn/3795e25fbe27a6aad8f516c05fa294b6 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
This processes TiddlyWiki dumps to stick them into a format that Bear can import.
Full steps:
* First in your TiddlyWiki under the Tools menu click "export all>Static HTML".
* Next, run this command `process_tiddly_export --tiddler_dump_file=somewhere/tiddlers.html --output_directory=/tmp/some_empty_folder/ --extra_tags=any,tags,you,want` it will
* process the static HTML file into one file per tiddler
* each file will start with <h1>your tiddler title</h1>
* next it will list any #tags on the original tiddler as well as and extra tags you supplied
* links are processed like this:
* an link to the web is not modified
* a link to a tiddler that originally looked like `[[Some Link]]` is converted back from an `<a>` tag to `[[Some Link]]`
* a link to a tiddler that originally looked like `[[details|Some Link]]` is converted to `details ([[Some Link]])`
* the HTML from the dump is otherwise preserved within the contents of the tiddler
* Finally, in Bear click on File>Import Notes and select every file in the output_directory. (Make sure every file is highlighted. CMD+A doesn't seem to work.)
"""
import argparse
import os
from urllib.parse import unquote
from bs4 import BeautifulSoup
def correct_links(body):
for link in body.find_all('a'):
href = unquote(link.attrs.get('href', '').lstrip('#'))
if not href:
continue
text = link.text
if not text:
continue
if href.split('//')[0] in ('http:', 'https:'):
continue
elif href == text:
link.replaceWith(f'[[{text}]]')
else: # href != link.text:
link.replaceWith(f'{text}, ([[{href}]])')
return body
def tiddler_data(tiddler):
data = {}
data['title'] = tiddler.find(class_='tc-title').text.strip()
data['tags'] = [tag.text.strip() for tag in tiddler.find_all(class_='tc-tag-label')]
data['body'] = correct_links(tiddler.find(class_='tc-tiddler-body'))
return data
def write_to_file(directory, tiddler, extra_tags=None):
if extra_tags is None:
extra_tags = []
tags = extra_tags + tiddler['tags']
tags = ' '.join([f'#{t}' for t in tags])
html = f"""<!doctype html>
<html lang="en">
<head>
<title>{tiddler['title']}</title>
<meta name="description" content="The HTML5 Herald">
<meta name="author" content="SitePoint">
</head>
<body>
<h1>{tiddler['title']}</h1>
{tags}
<br/>
{tiddler['body']}
</body>
</html>
"""
file_name = tiddler['title'].replace('/', '.')
with open(os.path.join(directory, f'{file_name}.html'), 'w') as f:
f.write(html)
def main(tiddler_dump_file, output_directory, extra_tags=None):
print("importing tiddler dump")
with open(tiddler_dump_file, 'r') as f:
soup = BeautifulSoup(f)
print("extracting tiddler data")
raw_tiddlers = soup.find_all(class_='tc-tiddler-frame')
tiddlers = [tiddler_data(t) for t in raw_tiddlers]
print("writing to files")
for tiddler in tiddlers:
write_to_file(output_directory, tiddler, extra_tags)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Export Tiddly Wiki')
parser.add_argument('--tiddler_dump_file', dest='tiddler_dump_file', action='store', help='export file from tiddlywiki')
parser.add_argument('--output_directory', dest='output_directory', action='store', help='empty directory to dump html files')
parser.add_argument('--extra_tags', dest='extra_tags', action='store', help='extra tags to apply to all files (comma separated)')
args = parser.parse_args()
main(args.tiddler_dump_file, args.output_directory, args.extra_tags.split(','))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment