Skip to content

Instantly share code, notes, and snippets.

@pablomon
Forked from kskmori/trac2md.py
Last active December 7, 2023 09:45
Show Gist options
  • Save pablomon/c8e1b42a0f75559f0e7f43b05fde1338 to your computer and use it in GitHub Desktop.
Save pablomon/c8e1b42a0f75559f0e7f43b05fde1338 to your computer and use it in GitHub Desktop.
Trac Wiki to Markdown converter
#!/usr/bin/env python
#
# Trac Wiki to Markdown converter
#
# Copyright(c) 2019 Keisuke MORI ([email protected])
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
### for Python 2/3 compatible code in Python 3 style
# https://stackoverflow.com/questions/5868506/backwards-compatible-input-calls-in-python
## print
from __future__ import print_function
## raw_input/input
if hasattr(__builtins__, 'raw_input'):
input = raw_input
## urllib
# 'urllib is the hardest module to use from Python 2/3 compatible code.'
# - from https://python-future.org/compatible_idioms.html
try:
# Python 3
from urllib.request import urlopen, Request
from urllib.error import HTTPError
except ImportError:
# Python 2.7
from urllib2 import urlopen, Request, HTTPError
import sys
import re
import ssl
import getpass
import base64
import io
raw = False
table = False
raw_indent = ' '
list_level = -1
list_indent = [-1]
def list_adjust_indent(m):
global list_level, list_indent
num_spaces = len(m.group(1))
while num_spaces < list_indent[-1]:
list_level -= 1
list_indent = list_indent[:-1]
if num_spaces > list_indent[-1]:
list_level += 1
list_indent.append(num_spaces)
return (' ' * (1 + list_level * 2)) + '* '
def do_raw(line):
global raw
line, count = re.subn(r'^}}}$', r'```', line)
if count > 0:
raw = False
line = raw_indent + line
return line
def do_tracwiki(line):
global raw, table
# section
line = re.sub(r'^=\s+(.*?)(\s+=)?$', r'# \1', line)
line = re.sub(r'^==\s+(.*?)(\s+==)?$', r'## \1', line)
line = re.sub(r'^===\s+(.*?)(\s+===)?$', r'### \1', line)
line = re.sub(r'^====\s+(.*?)(\s+====)?$', r'#### \1', line)
# list
line = re.sub(r'^(\s*)\* ', list_adjust_indent, line)
line = re.sub(r'^(\s*)\- ', list_adjust_indent, line)
# text
line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)
# code
line = re.sub(r'{{{(.*?)}}}', r'`\1`', line)
# link
line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
# section link
line = re.sub(r'\[(#[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
# section anchor
line = re.sub(r'\s#([^\s\[\]]+)', r' <a id="\1"></a>', line)
# table
if not table:
line, count = re.subn(r'\|\|', r'|', line)
if count > 0:
header = re.sub(r'[^\|\s]', r'-', line)
line = '\n' + line + '\n' + header
table = True
else:
line, count = re.subn(r'\|\|', r'|', line)
if count == 0:
table = False
# clean spaces from table headers
splits = line.split('|')
if len(splits) % 1 == 0:
for i in range(len(splits)):
# if the split is made by hyphens and spaces transform everything into a single hyphen
if re.match(r'^[\- ]+$', splits[i]):
splits[i] = '-'
line = '|'.join(splits)
# macro
line = re.sub(r'(?i)\[\[BR\]\]', r'<br />', line)
# TOC is not supported - use other tools
line = re.sub(r'\[\[PageOutline.*\]\]', r'<!--- TOC --->\n<!--- /TOC --->', line)
# images
line = re.sub(r'\[\[Image\(([^)]+)\)\]\]', r'![\1](/\1)', line)
line = re.sub(r'\[\[Image\(', r'[[Image\\', line)
line = re.sub(':', '/', line)
line = line.lower()
# translated pages - not supported
line = re.sub(r'\[\[TranslatedPages\]\]', '', line)
# raw
line, count = re.subn(r'^{{{$', r'```', line)
if count > 0:
raw = True
line = raw_indent + line
return line
###
def get_from_url(url):
# redirect prompts to stderr to allow to redirect the converted output
old_stdout = sys.stdout
sys.stdout = sys.stderr
user = input('Trac username: ') # raw_input() in Python 2
password = getpass.getpass('Trac Password: ')
sys.stdout = old_stdout
headers = {}
headers['Authorization'] = 'Basic ' + base64.b64encode((user + ':' + password).encode('utf-8')).decode('utf-8') # Python3 compatible
# headers['Authorization'] = 'Basic ' + (user + ':' + password).encode('base64')[:-1] # Python 2 only
# WARN: it's to disable SSL Certificate Verification
ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
req = Request(url=url + '?format=txt', headers=headers)
response = urlopen(req, context=ctx)
return io.BytesIO(response.read())
# return io.StringIO(response.read())
######################
if len(sys.argv) > 1:
if sys.argv[1].startswith(('http://', 'https://')):
trac_input = get_from_url(sys.argv[1])
else:
trac_input = open(sys.argv[1])
else:
trac_input = sys.stdin
for line in trac_input:
line = line.rstrip()
if raw:
line = do_raw(line)
else:
line = do_tracwiki(line)
print(line)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment