roycoding · November 20, 2022 17:20
diff --git a/Readme.md b/Readme.md
diff --git a/zg_convert.py b/zg_convert.py
 # Convert Markua 0.10 to Latex for Zefs Guides

 # Things this script handles
 # - Remove sample related stuff
 # - Convert chapter, section, subsection, etc
 # - Footnotes
 # - Links
 # - Links in footnotes
 # - Lists
 # - Images
 # - Asides
 # - Bold
 # - Italics
 # - Quotation marks
 # - LaTeX math
 # - Quoted text
 # - Other characters like $ and &
 # - Lists

 import re

 import click


 def sections(nfile: str) -> str:
    "Convert a markua header to a ZG LaTeX header"

    # Chapters
    _nfile = re.sub(r"^#{1} (.+)", r"\\chapter{\g<1>}", nfile, flags=re.M)

    # Sections
    _nfile = re.sub(r"^#{2} (.+)", r"\\sect{\g<1>}", _nfile, flags=re.M)

    # Subsections
    _nfile = re.sub(r"^#{3} (.+)", r"\\ssect{\g<1>}", _nfile, flags=re.M)

    # Subsubsections
    _nfile = re.sub(r"^#{4} (.+)", r"\\sssect{\g<1>}", _nfile, flags=re.M)

    return _nfile


 def footnotes(nfile: str) -> str:
    # As of now, this should be run BEFORE the links function
    # Assumes footnote text is correctly formated

    # List of footnotes
    fnlist = []
    for fn in set(re.findall(r"\[\^(.+?)\]", nfile)):
        fnlist.append(
            {
                "marker": fn,
                "note": re.findall(rf"^\[\^{fn}\]: (.+)$", nfile, flags=re.M)[0],
            }
        )

    # Convert marker to latex note and remove markua note
    for fn in fnlist:
        nfile = re.sub(
            rf"\[\^{fn['marker']}\]", rf"\\footnote{{{fn['note']}}}", nfile, count=1
        )
        nfile = nfile.replace(rf"[^{fn['marker']}]: {fn['note']}", "").strip()

    return nfile


 def links(nfile: str) -> str:
    # As of now, this should be run AFTER the footnotes function

    # Find text and URL
    links = re.findall(r"[^!]\[([^\^].+?)\]\((.+?)\)", nfile)

    _nfile = nfile

    # Replace markdown links with Latex links
    # Using my custom footnote link latex command
    for i, l in enumerate(links):
        if l[0] == l[1]:
            # Explicit URL
            _nfile = _nfile.replace(rf"[{l[0]}]({l[0]})", rf"\url{{{l[0]}}}")
        else:
            # Implicit URL
            # Escape any parentheses in the strings
            _l0 = re.sub(r"(\(|\))", r"\\\g<1>",l[0])
            _nfile = re.sub(
                rf"\[{_l0}\]\({l[1]}\)", rf"\\footurl{{{l[0]}}}{{{l[1]}}}", _nfile
            )

    return _nfile


 def images(nfile: str) -> str:
    # Find all images
    imgs = re.findall(r"!\[([^\^].+)\]\((.+)\)", nfile)

    _nfile = nfile

    for img in imgs:
        # Remove alt caption
        # str.replace pattern
        # pat = rf'{{alt: "{img[0]}", width: 100%}}'
        # _nfile = _nfile.replace(pat, "")
        _nfile = re.sub("\{alt:.+?\}", "", _nfile, flags=re.M)

        # Replace markdown image link with LaTeX figure env
        # str.replace pattern
        img_format = img[1][-3:]
        if img_format == "jpg":
            include = 'graphics'
            width = 0.3
        else:
            include = 'svg'
            width = 1.0
        fig = rf"""\begin{{figure}}[htb]
    \centering
    \include{include}[width={width}\textwidth]{{../{img[1]}}}
    \caption*{{{img[0]}}}
 \end{{figure}}
 """
        _nfile = _nfile.replace(rf"![{img[0]}]({img[1]})", fig)

    return _nfile


 def asides(nfile: str) -> str:
    return nfile.replace("{aside}", r"\begin{aside}").replace(
        "{/aside}", r"\end{aside}"
    )


 def metatags(nfile: str) -> str:
    # Remove Leanpub meta tags
    _nfile = re.sub(r"\{sample: .+\}", "", nfile)
    _nfile = _nfile.replace(r"{frontmatter}", "")
    _nfile = _nfile.replace(r"{mainmatter}", "")

    return _nfile


 def styles(nfile: str) -> str:
    # Convert italics and bold
    # Italics
    _nfile = re.sub(r"(?<!\*)\*{1}([^*]+?)\*{1}", r"\\emph{\g<1>}", nfile, flags=re.M)

    # Bold
    _nfile = re.sub(
        r"(?<!\*)\*{2}([^*]+?)\*{2}", r"\\textbf{\g<1>}", _nfile, flags=re.M
    )

    return _nfile


 def quote_marks(nfile: str) -> str:
    # Convert quotation marks
    # As of now this must be run AFTER image conversion

    return re.sub(r"\"(.+?)\"", r"``\g<1>''", nfile)


 def math(nfile: str) -> str:
    # Convert inline and stand alone math
    # Inline
    _nfile = re.sub(r"`{1}([^`]+?)`{1}\$", r"$\g<1>$", nfile)

    # Stand alone
    _nfile = re.sub(r"^`{3}\$\n((.+\n)+)`{3}", r"\\[\n\g<1>\\]", _nfile, flags=re.M)
    # _nfile = _nfile.replace()

    return _nfile


 def blockquotes(nfile: str) -> str:
    # Convert single line and multi-line blockquotes
    # Single line
    _nfile = re.sub(
        r"\n>(.+?)\n", r"\n\\begin{quote}\n    \g<1>\n\\end{quote}\n", nfile
    )

    # Multi-line
    _nfile = _nfile.replace("{blockquote}", r"\begin{quote}").replace(
        "{/blockquote}", r"""
 \end{quote}"""
    )

    return _nfile


 def special_chars(nfile: str) -> str:
    # Handle special chars, such as $, %, and &
    # $, but not inline math $ followed by a single digit ## super hack
    _nfile = re.sub(r"\$(?=\d{2,}|\d,)", r"\\$", nfile)

    # %
    _nfile = re.sub(r"(\d+)%(?!\})", r"\g<1>\%", _nfile)

    # &. Don't pick up the ampersands in multi-line LaTeX math
    _nfile = re.sub(r"(?<!^)(?<!\s{2})&", r"\\&", _nfile)

    # #
    _nfile = re.sub(r"#", r"\\#", _nfile)

    return _nfile


 def lists(nfile: str) -> str:
    # Convert lists of various types
    # This seem to work better earlier in the order of functions called
    # Unordered lists
    lists = [x[0] for x in re.findall(r"^(- (.+\n)+)", nfile, flags=re.M)]

    _nfile = nfile

    for l in lists:
        items = l.strip().replace("- ", "").split("\n")
        latex = f"\\begin{{itemize}}[noitemsep]\n"
        for i in items:
            latex += f"    \item{{{i}}}\n"
        latex += f"\end{{itemize}}\n"
        _nfile = _nfile.replace(l, latex)

    # Ordered lists
    lists = [x[0] for x in re.findall(r"^(\d\. (.+\n)+)", _nfile, flags=re.M)]

    for l in lists:
        items = re.sub(r"\d+\. ", "", l.strip()).split("\n")
        latex = f"\\begin{{enumerate}}[noitemsep]\n"
        for i in items:
            latex += f"    \item{{{i}}}\n"
        latex += f"\end{{enumerate}}\n"
        _nfile = _nfile.replace(l, latex)

    return _nfile


 @click.command()
 @click.argument("in_file", type=click.File("r"))
 @click.argument("out_file", type=click.File("w"))
 def convert(in_file: str, out_file) -> str:

    nfile = in_file.read()

    # Convert blockquotes
    nfile = blockquotes(nfile)

    # Convert lists
    nfile = lists(nfile)

    # Remove Leanpub meta tags
    nfile = metatags(nfile)

    # Convert footnotes
    nfile = footnotes(nfile)

    # Convert text styles
    nfile = styles(nfile)

    # Convert links
    nfile = links(nfile)

    # Convert images
    nfile = images(nfile)

    # Convert asides
    nfile = asides(nfile)

    # Convert quotation marks
    nfile = quote_marks(nfile)

    # Convert math environment markers
    nfile = math(nfile)

    # Convert section headers
    nfile = sections(nfile)

    # Escape special characters
    nfile = special_chars(nfile)

    out_file.write(nfile)


 if __name__ == "__main__":
    convert()
	# Convert Markua 0.10 to Latex for Zefs Guides

	# Things this script handles
	# - Remove sample related stuff
	# - Convert chapter, section, subsection, etc
	# - Footnotes
	# - Links
	# - Links in footnotes
	# - Lists
	# - Images
	# - Asides
	# - Bold
	# - Italics
	# - Quotation marks
	# - LaTeX math
	# - Quoted text
	# - Other characters like $ and &
	# - Lists

	import re

	import click


	def sections(nfile: str) -> str:
	"Convert a markua header to a ZG LaTeX header"

	# Chapters
	_nfile = re.sub(r"^#{1} (.+)", r"\\chapter{\g<1>}", nfile, flags=re.M)

	# Sections
	_nfile = re.sub(r"^#{2} (.+)", r"\\sect{\g<1>}", _nfile, flags=re.M)

	# Subsections
	_nfile = re.sub(r"^#{3} (.+)", r"\\ssect{\g<1>}", _nfile, flags=re.M)

	# Subsubsections
	_nfile = re.sub(r"^#{4} (.+)", r"\\sssect{\g<1>}", _nfile, flags=re.M)

	return _nfile


	def footnotes(nfile: str) -> str:
	# As of now, this should be run BEFORE the links function
	# Assumes footnote text is correctly formated

	# List of footnotes
	fnlist = []
	for fn in set(re.findall(r"\[\^(.+?)\]", nfile)):
	fnlist.append(
	{
	"marker": fn,
	"note": re.findall(rf"^\[\^{fn}\]: (.+)$", nfile, flags=re.M)[0],
	}
	)

	# Convert marker to latex note and remove markua note
	for fn in fnlist:
	nfile = re.sub(
	rf"\[\^{fn['marker']}\]", rf"\\footnote{{{fn['note']}}}", nfile, count=1
	)
	nfile = nfile.replace(rf"[^{fn['marker']}]: {fn['note']}", "").strip()

	return nfile


	def links(nfile: str) -> str:
	# As of now, this should be run AFTER the footnotes function

	# Find text and URL
	links = re.findall(r"[^!]\[([^\^].+?)\]\((.+?)\)", nfile)

	_nfile = nfile

	# Replace markdown links with Latex links
	# Using my custom footnote link latex command
	for i, l in enumerate(links):
	if l[0] == l[1]:
	# Explicit URL
	_nfile = _nfile.replace(rf"[{l[0]}]({l[0]})", rf"\url{{{l[0]}}}")
	else:
	# Implicit URL
	# Escape any parentheses in the strings
	_l0 = re.sub(r"(\(\|\))", r"\\\g<1>",l[0])
	_nfile = re.sub(
	rf"\[{_l0}\]\({l[1]}\)", rf"\\footurl{{{l[0]}}}{{{l[1]}}}", _nfile
	)

	return _nfile


	def images(nfile: str) -> str:
	# Find all images
	imgs = re.findall(r"!\[([^\^].+)\]\((.+)\)", nfile)

	_nfile = nfile

	for img in imgs:
	# Remove alt caption
	# str.replace pattern
	# pat = rf'{{alt: "{img[0]}", width: 100%}}'
	# _nfile = _nfile.replace(pat, "")
	_nfile = re.sub("\{alt:.+?\}", "", _nfile, flags=re.M)

	# Replace markdown image link with LaTeX figure env
	# str.replace pattern
	img_format = img[1][-3:]
	if img_format == "jpg":
	include = 'graphics'
	width = 0.3
	else:
	include = 'svg'
	width = 1.0
	fig = rf"""\begin{{figure}}[htb]
	\centering
	\include{include}[width={width}\textwidth]{{../{img[1]}}}
	\caption*{{{img[0]}}}
	\end{{figure}}
	"""
	_nfile = _nfile.replace(rf"![{img[0]}]({img[1]})", fig)

	return _nfile


	def asides(nfile: str) -> str:
	return nfile.replace("{aside}", r"\begin{aside}").replace(
	"{/aside}", r"\end{aside}"
	)


	def metatags(nfile: str) -> str:
	# Remove Leanpub meta tags
	_nfile = re.sub(r"\{sample: .+\}", "", nfile)
	_nfile = _nfile.replace(r"{frontmatter}", "")
	_nfile = _nfile.replace(r"{mainmatter}", "")

	return _nfile


	def styles(nfile: str) -> str:
	# Convert italics and bold
	# Italics
	_nfile = re.sub(r"(?<!\)\{1}([^]+?)\{1}", r"\\emph{\g<1>}", nfile, flags=re.M)

	# Bold
	_nfile = re.sub(
	r"(?<!\)\{2}([^]+?)\{2}", r"\\textbf{\g<1>}", _nfile, flags=re.M
	)

	return _nfile


	def quote_marks(nfile: str) -> str:
	# Convert quotation marks
	# As of now this must be run AFTER image conversion

	return re.sub(r"\"(.+?)\"", r"``\g<1>''", nfile)


	def math(nfile: str) -> str:
	# Convert inline and stand alone math
	# Inline
	_nfile = re.sub(r"`{1}([^`]+?)`{1}\$", r"$\g<1>$", nfile)

	# Stand alone
	_nfile = re.sub(r"^`{3}\$\n((.+\n)+)`{3}", r"\\[\n\g<1>\\]", _nfile, flags=re.M)
	# _nfile = _nfile.replace()

	return _nfile


	def blockquotes(nfile: str) -> str:
	# Convert single line and multi-line blockquotes
	# Single line
	_nfile = re.sub(
	r"\n>(.+?)\n", r"\n\\begin{quote}\n \g<1>\n\\end{quote}\n", nfile
	)

	# Multi-line
	_nfile = _nfile.replace("{blockquote}", r"\begin{quote}").replace(
	"{/blockquote}", r"""
	\end{quote}"""
	)

	return _nfile


	def special_chars(nfile: str) -> str:
	# Handle special chars, such as $, %, and &
	# $, but not inline math $ followed by a single digit ## super hack
	_nfile = re.sub(r"\$(?=\d{2,}\|\d,)", r"\\$", nfile)

	# %
	_nfile = re.sub(r"(\d+)%(?!\})", r"\g<1>\%", _nfile)

	# &. Don't pick up the ampersands in multi-line LaTeX math
	_nfile = re.sub(r"(?<!^)(?<!\s{2})&", r"\\&", _nfile)

	# #
	_nfile = re.sub(r"#", r"\\#", _nfile)

	return _nfile


	def lists(nfile: str) -> str:
	# Convert lists of various types
	# This seem to work better earlier in the order of functions called
	# Unordered lists
	lists = [x[0] for x in re.findall(r"^(- (.+\n)+)", nfile, flags=re.M)]

	_nfile = nfile

	for l in lists:
	items = l.strip().replace("- ", "").split("\n")
	latex = f"\\begin{{itemize}}[noitemsep]\n"
	for i in items:
	latex += f" \item{{{i}}}\n"
	latex += f"\end{{itemize}}\n"
	_nfile = _nfile.replace(l, latex)

	# Ordered lists
	lists = [x[0] for x in re.findall(r"^(\d\. (.+\n)+)", _nfile, flags=re.M)]

	for l in lists:
	items = re.sub(r"\d+\. ", "", l.strip()).split("\n")
	latex = f"\\begin{{enumerate}}[noitemsep]\n"
	for i in items:
	latex += f" \item{{{i}}}\n"
	latex += f"\end{{enumerate}}\n"
	_nfile = _nfile.replace(l, latex)

	return _nfile


	@click.command()
	@click.argument("in_file", type=click.File("r"))
	@click.argument("out_file", type=click.File("w"))
	def convert(in_file: str, out_file) -> str:

	nfile = in_file.read()

	# Convert blockquotes
	nfile = blockquotes(nfile)

	# Convert lists
	nfile = lists(nfile)

	# Remove Leanpub meta tags
	nfile = metatags(nfile)

	# Convert footnotes
	nfile = footnotes(nfile)

	# Convert text styles
	nfile = styles(nfile)

	# Convert links
	nfile = links(nfile)

	# Convert images
	nfile = images(nfile)

	# Convert asides
	nfile = asides(nfile)

	# Convert quotation marks
	nfile = quote_marks(nfile)

	# Convert math environment markers
	nfile = math(nfile)

	# Convert section headers
	nfile = sections(nfile)

	# Escape special characters
	nfile = special_chars(nfile)

	out_file.write(nfile)


	if __name__ == "__main__":
	convert()