Skip to content

Instantly share code, notes, and snippets.

@aphlysia
Last active April 25, 2021 07:59
Show Gist options
  • Save aphlysia/b2b8026e3c496c6ccb5499b841208577 to your computer and use it in GitHub Desktop.
Save aphlysia/b2b8026e3c496c6ccb5499b841208577 to your computer and use it in GitHub Desktop.
Dropbox paper から Joplin への変換
#coding: utf-8
'''
paper2joplin.py
Dropbox paper から Joplin への変換
'''
import re, sys, argparse, pathlib
class Re:
# from https://stackoverflow.com/questions/597476/how-to-concisely-cascade-through-multiple-regex-statements-in-python
def __init__(self):
self.last_match = None
def match(self, pattern, text):
self.last_match = re.match(pattern, text)
return self.last_match
def search(self, pattern, text):
self.last_match = re.search(pattern, text)
return self.last_match
def convert(filepath, out_dir):
out_dir.mkdir(parents=True, exist_ok=True)
n_images = 0
n_links = 0
out_file = (out_dir / filepath.name).open('w')
gre = Re()
code_block = False
code_block_may_starts = False
def common(out_file):
if code_block:
out_file.write('```\n')
return False, False
return code_block, False
for line in filepath.open():
line = line.replace('\u200b', '')
m = re.findall(r'[!]\[[^\]]*\]\([^\)]+\)', line)
n_images += len(m)
m = re.findall(r'\[[^\]]*\]\(https://paper\.dropbox\.com/doc/[^\)]+\)', line)
n_links += len(m)
if gre.match('^$', line):
if code_block:
code_block, code_block_may_starts = common(out_file)
code_block_may_starts = True
out_file.write(line)
elif gre.match('^#', line):
if code_block:
code_block, code_block_may_starts = common(out_file)
code_block_may_starts = True
out_file.write(line.replace('$$', '$'))
elif gre.match(r'^\s*\$\$([^$]*)\$\$\s*\(([^$]+)\)$', line):
code_block, code_block_may_starts = common(out_file)
m = gre.last_match
out_file.write('$$\\tag{{{}}}{}$$\n'.format(m.group(2), m.group(1)))
elif gre.match(r'^\s*\$\$([^$]*)\$\$\s*$', line):
code_block, code_block_may_starts = common(out_file)
m = gre.last_match
out_file.write('$${}$$\n'.format(m.group(1)))
elif gre.match(r'^\s*\$\$([^$]*)\$\$[,、,]$', line):
code_block, code_block_may_starts = common(out_file)
m = gre.last_match
out_file.write('$${},$$\n'.format(m.group(1)))
elif gre.match(r'^\s*\$\$([^$]*)\$\$[.。.]$', line):
code_block, code_block_may_starts = common(out_file)
m = gre.last_match
out_file.write('$${}.$$\n'.format(m.group(1)))
elif gre.match(r'^ (.*)', line):
if code_block_may_starts:
code_block = True
code_block_may_starts = False
out_file.write('```\n')
if code_block:
out_file.write('{}\n'.format(gre.last_match.group(1)))
if '$$' in line:
print(f'warn: {line}', file=sys.stderr)
else:
out_file.write(line.replace('$$', '$'))
else:
code_block, code_block_may_starts = common(out_file)
out_file.write(line.replace('$$', '$'))
if n_images > 0 or n_links > 0:
print(filepath, file=sys.stderr)
if n_images > 0:
print(f'{n_images} images', file=sys.stderr)
if n_links > 0:
print(f'{n_links} internal links', file=sys.stderr)
print(file=sys.stderr)
def find(in_dir, out_dir):
for item in in_dir.glob('*'):
if item.is_dir():
find(in_dir / item.name, out_dir / item.name)
if item.name.endswith('.md'):
convert(item, out_dir)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('in_dir')
parser.add_argument('out_dir')
args = parser.parse_args()
in_dir = pathlib.Path(args.in_dir)
out_dir = pathlib.Path(args.out_dir)
find(in_dir, out_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment