pablomon · December 7, 2023 09:45
diff --git a/trac2md.py b/trac2md.py
 #!/usr/bin/env python
 #
 # Trac Wiki to Markdown converter
 #
 # Copyright(c) 2019 Keisuke MORI ([email protected])
 #
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 # as published by the Free Software Foundation; either version 2
 # of the License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 #

 ### for Python 2/3 compatible code in Python 3 style
 #    https://stackoverflow.com/questions/5868506/backwards-compatible-input-calls-in-python
 ##   print
 from __future__ import print_function
 ##   raw_input/input
 if hasattr(__builtins__, 'raw_input'):
    input = raw_input
 ## urllib
 #   'urllib is the hardest module to use from Python 2/3 compatible code.'
 #    - from https://python-future.org/compatible_idioms.html
 try:
    # Python 3
    from urllib.request import urlopen, Request
    from urllib.error import HTTPError
 except ImportError:
    # Python 2.7
    from urllib2 import urlopen, Request, HTTPError

 import sys
 import re

 import ssl
 import getpass
 import base64
 import io

 raw = False
 table = False
 raw_indent = '  '

 list_level = -1
 list_indent = [-1]

 def list_adjust_indent(m):
    global list_level, list_indent
    num_spaces = len(m.group(1))

    while num_spaces < list_indent[-1]:
        list_level -= 1
        list_indent = list_indent[:-1]
    if num_spaces > list_indent[-1]:
        list_level += 1
        list_indent.append(num_spaces)

    return (' ' * (1 + list_level * 2)) + '* '

 def do_raw(line):
    global raw
    line, count = re.subn(r'^}}}$', r'```', line)
    if count > 0:
        raw = False
    line = raw_indent + line
    return line

 def do_tracwiki(line):
    global raw, table

    # section
    line = re.sub(r'^=\s+(.*?)(\s+=)?$', r'# \1', line)
    line = re.sub(r'^==\s+(.*?)(\s+==)?$', r'## \1', line)
    line = re.sub(r'^===\s+(.*?)(\s+===)?$', r'### \1', line)
    line = re.sub(r'^====\s+(.*?)(\s+====)?$', r'#### \1', line)

    # list
    line = re.sub(r'^(\s*)\* ', list_adjust_indent, line)
    line = re.sub(r'^(\s*)\- ', list_adjust_indent, line)

    # text
    line = re.sub(r'\'\'\'(.*?)\'\'\'', r'*\1*', line)
    line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)

    # code
    line = re.sub(r'{{{(.*?)}}}', r'`\1`', line)

    # link
    line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)

    # section link
    line = re.sub(r'\[(#[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
    # section anchor
    line = re.sub(r'\s#([^\s\[\]]+)', r' <a id="\1"></a>', line)

    # table
    if not table:
        line, count = re.subn(r'\|\|', r'|', line)
        if count > 0:
            header = re.sub(r'[^\|\s]', r'-', line)
            line = '\n' + line + '\n' + header
            table = True
    else:
        line, count = re.subn(r'\|\|', r'|', line)
        if count == 0:
            table = False

    # clean spaces from table headers
    splits = line.split('|')
    if len(splits) % 1 == 0:
        for i in range(len(splits)):
            # if the split is made by hyphens and spaces transform everything into a single hyphen
            if re.match(r'^[\- ]+$', splits[i]):
                splits[i] = '-'
        line = '|'.join(splits)

    # macro
    line = re.sub(r'(?i)\[\[BR\]\]', r'<br />', line)
    #  TOC is not supported - use other tools
    line = re.sub(r'\[\[PageOutline.*\]\]', r'<!--- TOC --->\n<!--- /TOC --->', line)

    # images
    line = re.sub(r'\[\[Image\(([^)]+)\)\]\]', r'![\1](/\1)', line)
    line = re.sub(r'\[\[Image\(', r'[[Image\\', line)
    line = re.sub(':', '/', line)
    line = line.lower()

    # translated pages - not supported
    line = re.sub(r'\[\[TranslatedPages\]\]', '', line)

    # raw
    line, count = re.subn(r'^{{{$', r'```', line)
    if count > 0:
        raw = True
        line = raw_indent + line

    return line

 ###

 def get_from_url(url):
    # redirect prompts to stderr to allow to redirect the converted output
    old_stdout = sys.stdout
    sys.stdout = sys.stderr
    user = input('Trac username: ') # raw_input() in Python 2
    password = getpass.getpass('Trac Password: ')
    sys.stdout = old_stdout

    headers = {}
    headers['Authorization'] = 'Basic ' + base64.b64encode((user + ':' + password).encode('utf-8')).decode('utf-8') # Python3 compatible
 #    headers['Authorization'] = 'Basic ' + (user + ':' + password).encode('base64')[:-1] # Python 2 only

    # WARN: it's to disable SSL Certificate Verification
    ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)

    req = Request(url=url + '?format=txt', headers=headers)
    response = urlopen(req, context=ctx)
    return io.BytesIO(response.read())
 #    return io.StringIO(response.read())


 ######################

 if len(sys.argv) > 1:
    if sys.argv[1].startswith(('http://', 'https://')):
        trac_input = get_from_url(sys.argv[1])
    else:
        trac_input = open(sys.argv[1])
 else:
    trac_input = sys.stdin


 for line in trac_input:
    line = line.rstrip()
    if raw:
        line = do_raw(line)
    else:
        line = do_tracwiki(line)

    print(line)
	#!/usr/bin/env python
	#
	# Trac Wiki to Markdown converter
	#
	# Copyright(c) 2019 Keisuke MORI ([email protected])
	#
	# This program is free software; you can redistribute it and/or
	# modify it under the terms of the GNU General Public License
	# as published by the Free Software Foundation; either version 2
	# of the License, or (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write to the Free Software
	# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
	#

	### for Python 2/3 compatible code in Python 3 style
	# https://stackoverflow.com/questions/5868506/backwards-compatible-input-calls-in-python
	## print
	from __future__ import print_function
	## raw_input/input
	if hasattr(__builtins__, 'raw_input'):
	input = raw_input
	## urllib
	# 'urllib is the hardest module to use from Python 2/3 compatible code.'
	# - from https://python-future.org/compatible_idioms.html
	try:
	# Python 3
	from urllib.request import urlopen, Request
	from urllib.error import HTTPError
	except ImportError:
	# Python 2.7
	from urllib2 import urlopen, Request, HTTPError

	import sys
	import re

	import ssl
	import getpass
	import base64
	import io

	raw = False
	table = False
	raw_indent = ' '

	list_level = -1
	list_indent = [-1]

	def list_adjust_indent(m):
	global list_level, list_indent
	num_spaces = len(m.group(1))

	while num_spaces < list_indent[-1]:
	list_level -= 1
	list_indent = list_indent[:-1]
	if num_spaces > list_indent[-1]:
	list_level += 1
	list_indent.append(num_spaces)

	return (' ' * (1 + list_level * 2)) + '* '

	def do_raw(line):
	global raw
	line, count = re.subn(r'^}}}$', r'```', line)
	if count > 0:
	raw = False
	line = raw_indent + line
	return line

	def do_tracwiki(line):
	global raw, table

	# section
	line = re.sub(r'^=\s+(.*?)(\s+=)?$', r'# \1', line)
	line = re.sub(r'^==\s+(.*?)(\s+==)?$', r'## \1', line)
	line = re.sub(r'^===\s+(.*?)(\s+===)?$', r'### \1', line)
	line = re.sub(r'^====\s+(.*?)(\s+====)?$', r'#### \1', line)

	# list
	line = re.sub(r'^(\s)\ ', list_adjust_indent, line)
	line = re.sub(r'^(\s*)\- ', list_adjust_indent, line)

	# text
	line = re.sub(r'\'\'\'(.?)\'\'\'', r'\1*', line)
	line = re.sub(r'\'\'(.*?)\'\'', r'_\1_', line)

	# code
	line = re.sub(r'{{{(.*?)}}}', r'`\1`', line)

	# link
	line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)

	# section link
	line = re.sub(r'\[(#[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line)
	# section anchor
	line = re.sub(r'\s#([^\s\[\]]+)', r' <a id="\1"></a>', line)

	# table
	if not table:
	line, count = re.subn(r'\\|\\|', r'\|', line)
	if count > 0:
	header = re.sub(r'[^\\|\s]', r'-', line)
	line = '\n' + line + '\n' + header
	table = True
	else:
	line, count = re.subn(r'\\|\\|', r'\|', line)
	if count == 0:
	table = False

	# clean spaces from table headers
	splits = line.split('\|')
	if len(splits) % 1 == 0:
	for i in range(len(splits)):
	# if the split is made by hyphens and spaces transform everything into a single hyphen
	if re.match(r'^[\- ]+$', splits[i]):
	splits[i] = '-'
	line = '\|'.join(splits)

	# macro
	line = re.sub(r'(?i)\[\[BR\]\]', r'<br />', line)
	# TOC is not supported - use other tools
	line = re.sub(r'\[\[PageOutline.*\]\]', r'<!--- TOC --->\n<!--- /TOC --->', line)

	# images
	line = re.sub(r'\[\[Image\(([^)]+)\)\]\]', r'![\1](/\1)', line)
	line = re.sub(r'\[\[Image\(', r'[[Image\\', line)
	line = re.sub(':', '/', line)
	line = line.lower()

	# translated pages - not supported
	line = re.sub(r'\[\[TranslatedPages\]\]', '', line)

	# raw
	line, count = re.subn(r'^{{{$', r'```', line)
	if count > 0:
	raw = True
	line = raw_indent + line

	return line

	###

	def get_from_url(url):
	# redirect prompts to stderr to allow to redirect the converted output
	old_stdout = sys.stdout
	sys.stdout = sys.stderr
	user = input('Trac username: ') # raw_input() in Python 2
	password = getpass.getpass('Trac Password: ')
	sys.stdout = old_stdout

	headers = {}
	headers['Authorization'] = 'Basic ' + base64.b64encode((user + ':' + password).encode('utf-8')).decode('utf-8') # Python3 compatible
	# headers['Authorization'] = 'Basic ' + (user + ':' + password).encode('base64')[:-1] # Python 2 only

	# WARN: it's to disable SSL Certificate Verification
	ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)

	req = Request(url=url + '?format=txt', headers=headers)
	response = urlopen(req, context=ctx)
	return io.BytesIO(response.read())
	# return io.StringIO(response.read())


	######################

	if len(sys.argv) > 1:
	if sys.argv[1].startswith(('http://', 'https://')):
	trac_input = get_from_url(sys.argv[1])
	else:
	trac_input = open(sys.argv[1])
	else:
	trac_input = sys.stdin


	for line in trac_input:
	line = line.rstrip()
	if raw:
	line = do_raw(line)
	else:
	line = do_tracwiki(line)

	print(line)