Skip to content

Instantly share code, notes, and snippets.

@q3cpma
Last active October 25, 2023 18:36
Show Gist options
  • Save q3cpma/2e0996839e9bed6ba60661dd7e86b451 to your computer and use it in GitHub Desktop.
Save q3cpma/2e0996839e9bed6ba60661dd7e86b451 to your computer and use it in GitHub Desktop.

A way to convert markup from a certain imageboard into HTML. Contains two version: a pure Tcl one and another in Common Lisp using a few packages.

Example usage:

$ cat sample.txt
>green
<pink
==heading==
**spoiler** ''italic'' '''bold''' __underlined__ ~~strikethrough~~
Normal
$ ./gtext.tcl <sample.txt >sample.html
$ cat sample.html
<!DOCTYPE html>
<html>
  <head>
    <title>Title</title>
    <link rel='stylesheet' href='gtext.css'>
  </head>
  <body>
    <pre>
<span class="green">&gt;green</span>
<span class="pink">&lt;pink</span>
<span class="heading">heading</span>
<span class="spoiler">spoiler</span> <i>italic</i> <b>bold</b> <u>underlined</u> <s>strikethrough</s>
Normal
    </pre>
  </body>
</html>
body {
background-color: #d6daf0;
text-align: center;
}
pre {
font-family: Sans-serif;
font-size: 12pt;
color: black;
width: 80ch;
white-space: pre-wrap;
overflow-wrap: break-word;
align: center;
display: inline-block;
text-align: left;
}
.green {
color: #829f38;
}
.pink {
color: #e0727f;
}
.heading {
color: #af0a0f;
font-size: 13pt;
font-weight: bold;
}
.spoiler {
background: black;
color: black;
padding: 0px 1px;
}
.spoiler:hover {
color: white;
}
(load "~/.quicklisp/setup.lisp")
(ql:quickload '("alexandria" "cl-ppcre" "iterate" "spinneret") :silent t)
(use-package :alexandria)
(use-package :iterate)
(require :uiop)
(define-constant +markup-to-html+
'(("^(<.*)$" . (:span :class "pink"))
("^(>.*)$" . (:span :class "green"))
("^==(.*)==$" . (:span :class "heading"))
("\\*{2}(.*?)\\*{2}" . (:span :class "spoiler"))
("(?<!')'{2}([^']*?)'{2}" . (:i)) ;; band-aid solution to differentiate bold and italic regexps
("'{3}(.*?)'{3}" . (:b))
("_{2}(.*?)_{2}" . (:u))
("~{2}(.*?)~{2}" . (:s)))
:test #'equal)
(defstruct (markup-region (:conc-name ""))
(start 0 :type fixnum)
(end 0 :type fixnum)
(ctstart 0 :type fixnum) ;; content start
(ctend 0 :type fixnum) ;; content end
(tag nil :type list))
(defun find-markup (text)
"Find all markup in text and return it as a sorted (by start pos) list of regions."
(nreverse
(iter
(for (pat . tag) in +markup-to-html+)
(for scanner = (cl-ppcre::create-scanner pat :multi-line-mode t))
(for matches = (let ((ret))
(cl-ppcre:do-scans (match-start match-end reg-starts reg-ends scanner text ret)
(push (make-markup-region :start match-start
:end match-end
:ctstart (first-elt reg-starts)
:ctend (first-elt reg-ends)
:tag tag)
ret))))
(accumulate matches by (lambda (acc l) (merge 'list acc l #'> :key #'start))))))
(defun create-tree (markup-map text &key (root-tag :body))
"Transform the output of find-markup into a complete spinneret tag tree."
(car
(create-tree/impl
(cons
(make-markup-region
:start 0
:end (length text)
:ctstart 0
:ctend (length text)
:tag (list root-tag))
markup-map)
text)))
(defun create-tree/impl (markup-map text)
(macrolet ((extract (start end)
`(if (/= ,start ,end)
(push (subseq text ,start ,end) ret))))
(iter
(with root = (car markup-map))
(with xstart = (ctstart root))
(with next-cons = (cdr markup-map))
(with prev)
(with ret = (reverse (tag root)))
(while next-cons)
(for next = (car next-cons))
(while (<= (end next) (ctend root)))
(extract xstart (start next))
(destructuring-bind (child . new-next-cons) (create-tree/impl next-cons text)
(setf prev next
next-cons new-next-cons
next (car next-cons)
xstart (end prev))
(push child ret))
(while next-cons)
(finally
(extract xstart (ctend root))
(return (cons (nreverse ret) next-cons))))))
(defun print-tree (node &key (depth 0) (stream t))
(flet ((preprocess (obj)
(if (stringp obj) (substitute #\$ #\Newline obj) obj)))
(format stream "~&~0,vT(~S" (* depth 2) (preprocess (car node)))
(dolist (child (cdr node))
(if (consp child)
(print-tree child :depth (+ depth 1) :stream stream)
(format stream "~&~0,vT~S" (* (+ depth 1) 2) (preprocess child))))
(format stream ")")
(if (zerop depth)
(format stream "~%"))))
(if (equal uiop:*command-line-arguments* '("-h"))
(uiop:die 0 "Usage: ~A [TITLE] <DOC.txt >DOC.html"
(file-namestring #.(or *compile-file-truename* *load-truename*))))
(let* ((text (read-stream-content-into-string *standard-input*))
(markup-map (find-markup text))
(tree (create-tree markup-map text :root-tag :pre))
(spinneret:*html-style* :tree)
(*print-pretty* nil))
;; (print-tree tree :stream *debug-io*)
(spinneret:with-html (:doctype))
(spinneret:interpret-html-tree
`(:html
(:head
(:title ,(car (or uiop:*command-line-arguments* '("Title"))))
(:link :rel "stylesheet" :href "gtext.css"))
(:body
,tree))))
#!/usr/bin/env tclsh
proc html_encode {text} {
string map {
& {&amp;}
< {&lt;}
> {&gt;}
\" {&quot;}
' {&#x27;}
} $text
}
# Ungodly codegen abomination, but it works
proc apply_markup {line} {
set line [string map {\\ \\\\ \[ \\\[ \] \\\]} $line]; # Escape for future subst
foreach {pat repl} {
{^<.*} {<span class="pink">[html_encode {&}]</span>}
{^>.*} {<span class="green">[html_encode {&}]</span>}
{^={2}(.*?)={2}$} {<span class="heading">[html_encode {\\1}]</span>}
{^[^<>].*} {[html_encode {&}]}
{\*{2}(.*?)\*{2}} {\}]<span class="spoiler">[html_encode {\\1}]</span>[html_encode \{}
{'{3}(.*?)'{3}} {\}]<b>[html_encode {\\1}]</b>[html_encode \{}
{'{2}(.*?)'{2}} {\}]<i>[html_encode {\\1}]</i>[html_encode \{}
{_{2}(.*?)_{2}} {\}]<u>[html_encode {\\1}]</u>[html_encode \{}
{~{2}(.*?)~{2}} {\}]<s>[html_encode {\\1}]</s>[html_encode \{}
} {
set repl [subst -novar -nocom $repl]; # Expand escape braces and backslashes
set line [regsub -all $pat $line $repl]
}
subst -novar $line
}
if {$argc == 1 && [lindex $argv 0] eq "-h"} {
puts stderr "Usage: [file tail [info script]] \[TITLE\] <DOC.txt >DOC.html"
exit
}
puts [string trim "
<!DOCTYPE html>
<html>
<head>
<title>[if {$argc == 1} {lindex $argv 0} {string cat Title}]</title>
<link rel='stylesheet' href='gtext.css'>
</head>
<body>
<pre>
" \n]
foreach line [split [string trim [read stdin] \n] \n] {
puts [apply_markup $line]
}
puts [string trim "
</pre>
</body>
</html>
" \n]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment