JnBrymn · November 17, 2019 05:33
diff --git a/process_tiddly_export.py b/process_tiddly_export.py
 #!/usr/bin/env python3
 """
 This processes TiddlyWiki dumps to stick them into a format that Bear can import.

 Full steps:
 * First in your TiddlyWiki under the Tools menu click "export all>Static HTML".
 * Next, run this command `process_tiddly_export --tiddler_dump_file=somewhere/tiddlers.html  --output_directory=/tmp/some_empty_folder/ --extra_tags=any,tags,you,want` it will
  * process the static HTML file into one file per tiddler
  * each file will start with <h1>your tiddler title</h1>
  * next it will list any #tags on the original tiddler as well as and extra tags you supplied
  * links are processed like this:
    * an link to the web is not modified
    * a link to a tiddler that originally looked like `[[Some Link]]` is converted back from an `<a>` tag to `[[Some Link]]`
    * a link to a tiddler that originally looked like `[[details|Some Link]]` is converted to `details ([[Some Link]])`
  * the HTML from the dump is otherwise preserved within the contents of the tiddler
 * Finally, in Bear click on File>Import Notes and select every file in the output_directory. (Make sure every file is highlighted. CMD+A doesn't seem to work.)
 """

 import argparse
 import os
 from urllib.parse import unquote

 from bs4 import BeautifulSoup


 def correct_links(body):
    for link in body.find_all('a'):
        href = unquote(link.attrs.get('href', '').lstrip('#'))
        if not href:
            continue
        text = link.text
        if not text:
            continue
        if href.split('//')[0] in ('http:', 'https:'):
            continue
        elif href == text:
            link.replaceWith(f'[[{text}]]')
        else:  # href != link.text:
            link.replaceWith(f'{text}, ([[{href}]])')
    return body


 def tiddler_data(tiddler):
    data = {}
    data['title'] = tiddler.find(class_='tc-title').text.strip()
    data['tags'] = [tag.text.strip() for tag in tiddler.find_all(class_='tc-tag-label')]
    data['body'] = correct_links(tiddler.find(class_='tc-tiddler-body'))
    return data


 def write_to_file(directory, tiddler, extra_tags=None):
    if extra_tags is None:
        extra_tags = []
    tags = extra_tags + tiddler['tags']
    tags = ' '.join([f'#{t}' for t in tags])

    html = f"""<!doctype html>
 <html lang="en">
 <head>
  <title>{tiddler['title']}</title>
  <meta name="description" content="The HTML5 Herald">
  <meta name="author" content="SitePoint">
 </head>
 <body>
  <h1>{tiddler['title']}</h1>
  {tags}
  <br/>
    {tiddler['body']}
 </body>
 </html>
 """
    file_name = tiddler['title'].replace('/', '.')
    with open(os.path.join(directory, f'{file_name}.html'), 'w') as f:
        f.write(html)


 def main(tiddler_dump_file, output_directory, extra_tags=None):
    print("importing tiddler dump")
    with open(tiddler_dump_file, 'r') as f:
        soup = BeautifulSoup(f)
    print("extracting tiddler data")
    raw_tiddlers = soup.find_all(class_='tc-tiddler-frame')
    tiddlers = [tiddler_data(t) for t in raw_tiddlers]
    print("writing to files")

    for tiddler in tiddlers:
        write_to_file(output_directory, tiddler, extra_tags)


 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Export Tiddly Wiki')
    parser.add_argument('--tiddler_dump_file', dest='tiddler_dump_file', action='store', help='export file from tiddlywiki')
    parser.add_argument('--output_directory', dest='output_directory', action='store', help='empty directory to dump html files')
    parser.add_argument('--extra_tags', dest='extra_tags', action='store', help='extra tags to apply to all files (comma separated)')

    args = parser.parse_args()

    main(args.tiddler_dump_file, args.output_directory, args.extra_tags.split(','))
	#!/usr/bin/env python3
	"""
	This processes TiddlyWiki dumps to stick them into a format that Bear can import.

	Full steps:
	* First in your TiddlyWiki under the Tools menu click "export all>Static HTML".
	* Next, run this command `process_tiddly_export --tiddler_dump_file=somewhere/tiddlers.html --output_directory=/tmp/some_empty_folder/ --extra_tags=any,tags,you,want` it will
	* process the static HTML file into one file per tiddler
	* each file will start with <h1>your tiddler title</h1>
	* next it will list any #tags on the original tiddler as well as and extra tags you supplied
	* links are processed like this:
	* an link to the web is not modified
	* a link to a tiddler that originally looked like `[[Some Link]]` is converted back from an `<a>` tag to `[[Some Link]]`
	* a link to a tiddler that originally looked like `[[details\|Some Link]]` is converted to `details ([[Some Link]])`
	* the HTML from the dump is otherwise preserved within the contents of the tiddler
	* Finally, in Bear click on File>Import Notes and select every file in the output_directory. (Make sure every file is highlighted. CMD+A doesn't seem to work.)
	"""

	import argparse
	import os
	from urllib.parse import unquote

	from bs4 import BeautifulSoup


	def correct_links(body):
	for link in body.find_all('a'):
	href = unquote(link.attrs.get('href', '').lstrip('#'))
	if not href:
	continue
	text = link.text
	if not text:
	continue
	if href.split('//')[0] in ('http:', 'https:'):
	continue
	elif href == text:
	link.replaceWith(f'[[{text}]]')
	else: # href != link.text:
	link.replaceWith(f'{text}, ([[{href}]])')
	return body


	def tiddler_data(tiddler):
	data = {}
	data['title'] = tiddler.find(class_='tc-title').text.strip()
	data['tags'] = [tag.text.strip() for tag in tiddler.find_all(class_='tc-tag-label')]
	data['body'] = correct_links(tiddler.find(class_='tc-tiddler-body'))
	return data


	def write_to_file(directory, tiddler, extra_tags=None):
	if extra_tags is None:
	extra_tags = []
	tags = extra_tags + tiddler['tags']
	tags = ' '.join([f'#{t}' for t in tags])

	html = f"""<!doctype html>
	<html lang="en">
	<head>
	<title>{tiddler['title']}</title>
	<meta name="description" content="The HTML5 Herald">
	<meta name="author" content="SitePoint">
	</head>
	<body>
	<h1>{tiddler['title']}</h1>
	{tags}
	<br/>
	{tiddler['body']}
	</body>
	</html>
	"""
	file_name = tiddler['title'].replace('/', '.')
	with open(os.path.join(directory, f'{file_name}.html'), 'w') as f:
	f.write(html)


	def main(tiddler_dump_file, output_directory, extra_tags=None):
	print("importing tiddler dump")
	with open(tiddler_dump_file, 'r') as f:
	soup = BeautifulSoup(f)
	print("extracting tiddler data")
	raw_tiddlers = soup.find_all(class_='tc-tiddler-frame')
	tiddlers = [tiddler_data(t) for t in raw_tiddlers]
	print("writing to files")

	for tiddler in tiddlers:
	write_to_file(output_directory, tiddler, extra_tags)


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Export Tiddly Wiki')
	parser.add_argument('--tiddler_dump_file', dest='tiddler_dump_file', action='store', help='export file from tiddlywiki')
	parser.add_argument('--output_directory', dest='output_directory', action='store', help='empty directory to dump html files')
	parser.add_argument('--extra_tags', dest='extra_tags', action='store', help='extra tags to apply to all files (comma separated)')

	args = parser.parse_args()

	main(args.tiddler_dump_file, args.output_directory, args.extra_tags.split(','))