Last active
May 4, 2020 05:37
-
-
Save Skrylar/179648d5856ae49087ad34a87d0740f9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import printingpress | |
import skmap/robin | |
import math | |
type | |
FlowDirection* = enum | |
LeftToRight | |
TopToBottom | |
#- | |
BoxKind* = enum | |
Container ## Box is meant to contain other boxes. | |
Glyph ## Box contains an actual glyph. | |
#- | |
BoxFlag* = enum | |
Potato | |
BoxFlags* = set[BoxFlag] | |
#- | |
Box* = object | |
kind*: BoxKind ## What secrets does this box keep? | |
flags*: BoxFlags | |
flow*: FlowDirection ## What direction do items go? | |
width*, height*: int16 ## Size of the box contents. | |
glyph*: int16 ## Which glyph this box renders from the typeface (for glyph boxes.) | |
postglue*: int16 ## Glue space after this box | |
depth*: int16 ## How far below the baseline this box sits | |
children*: seq[Box] ## Boxes within this box (for containers.) | |
bestbreak*: int16 ## Offset to best breakpoint | |
badness*: int16 ## Calculated score to break on this box. | |
#- `FlowDirection` and `BoxKind` may get removed and folded in to | |
#- `BoxFlags` to save space. | |
#- == Recalculating bounding boxes | |
#- When first planning a paragraph the geometry of individual letters | |
#- may only be slightly known. Also when updating the contents of | |
#- various boxes the geometry needs to be recalculated. This is the | |
#- function that "trues up" the bounding boxes of boxes. | |
#- | |
#- If `deep` is `true` then children of the box will be recursively | |
#- updated as well. If `face` is a non-nil value then it will be used | |
#- to calculate the size of `Glyph` boxes. | |
proc recalculate_bounds*(self: var Box; face: ptr Typeface = nil; deep: bool = false) = | |
## Recalculates the width and height of a box based on contents. | |
case self.kind: | |
of Container: | |
# reset our dimensions; we will add up the dimensions of our children | |
self.width = 0 | |
self.height = 0 | |
# flow direction makes a difference | |
case self.flow | |
of LeftToRight: | |
for i in 0..<self.children.len: | |
template ch: untyped = self.children[i] | |
if deep: | |
recalculate_bounds(ch, face, true) | |
inc self.width, ch.width | |
inc self.width, ch.postglue | |
self.height = max(self.height, ch.height) | |
of TopToBottom: | |
for i in 0..<self.children.len: | |
template ch: untyped = self.children[i] | |
if deep: | |
recalculate_bounds(ch, face, true) | |
inc self.height, ch.height | |
inc self.height, ch.postglue | |
self.width = max(self.width, ch.width) | |
of Glyph: | |
# we just load dimensions off the typeface | |
if face != nil: | |
let g = face.glyphs[self.glyph.int] | |
self.width = g.region.width.int16 | |
self.height = g.region.height.int16 | |
self.depth = g.offset_y.int16 | |
self.postglue = (g.advance - self.width) | |
#- == Creating a single paragraph | |
#- `set_to_ansi` will break apart the supplied string as though it | |
#- were a single paragraph. Container boxes are added which in turn | |
#- hold glyph boxes for each letter. Whitespace is converted to glue | |
#- (stretchable space) between words. | |
proc set_to_ansi*(self: var Box; paragraph: string) = | |
## Clears the box and creates sub-boxes to represent a given string. | |
## Does not do anything intelligent with Unicode; don't even try. | |
proc whitespace(c: char): bool = | |
result = (c == ' ') | |
# need to transmute lead in to gold | |
self.kind = Container | |
self.children.set_len(0) | |
self.flow = LeftToRight # XXX some languages would disagree | |
# clear unrelated things | |
self.width = 0 | |
self.height = 0 | |
self.depth = 0 | |
var i = 0 | |
var wordy = whitespace(paragraph[0]) | |
var current_word = Box() | |
while i < paragraph.len: | |
if wordy: | |
# collect glyphs within a word | |
if not whitespace(paragraph[i]): | |
var letter = Box() | |
letter.kind = Glyph | |
letter.glyph = paragraph[i].int16 | |
current_word.children.add(letter) | |
else: | |
wordy = false | |
continue | |
else: | |
# convert whitespace to postglue | |
if whitespace(paragraph[i]): | |
current_word.postglue += 1 # <1> | |
else: | |
wordy = true | |
self.children.add(current_word) | |
current_word = Box() | |
continue | |
inc i | |
if current_word.children.len > 0: | |
self.children.add(current_word) | |
#- <1> We would ideally use the width of the font's actual space | |
#- character here but we do not know what it is. So we have to put the | |
#- number of spaces which *should* be here such as, a single space, | |
#- multiple spaces for paragraph indentions, end of sentence spaces and | |
#- so forth. You have to resort to `hydrate_spaces` to correct the glue | |
#- here. | |
#- | |
#- == Hydration of whitespace | |
#- If your paragraph construction proc does not know the absolute amount | |
#- of spacing that belongs between letters it must use relative spacing. | |
#- You can use this function to multiply that relative spacing by the | |
#- width of your typeface's space character to correct that. | |
proc hydrate_spaces*(self: var Box; space_width: int) = | |
## Multiplies the `postglue` of every contained box by the given | |
## `space_width`. | |
for i in 0..<self.children.len: | |
self.children[i].postglue *= space_width.int16 | |
#- == Line Breaking | |
#- Accepts a box of words and fits them to a given width. New lines | |
#- are added at various points which are called "line breaks." Several | |
#- algorithms for this exist. Some of them are fast but produce ugly | |
#- (but servicable) output. Some are more expensive but create type that | |
#- is comparable to professionally published books. | |
#- | |
#- === Greedy Line Break | |
#- . Greedy assumes you have a maximum width and a series of words. | |
#- . You pop a word from the start of your paragraph and consider adding it to the current line. | |
#- . If adding it to the line would not cause the line to become long, then do so and return to 2. | |
#- . Otherwise you must commit the current line and start a new one that starts with this word. | |
#- . When you run out of words to pop you commit the remaining line. | |
#- | |
#- This algorithm is simple and fast but produces paragraphs that are not | |
#- "print quality." | |
proc greedy_break*(box: Box; width: int): Box = | |
#- .Set up the vbox | |
result.kind = Container | |
result.flow = TopToBottom | |
result.children.set_len(0) | |
#- .Resetting the current hbox | |
var this_line: Box | |
template reset_line(line: var untyped) = | |
line = Box() | |
line.kind = Container | |
reset_line(this_line) | |
#- .Fit as many words to single lines as possible | |
var w = 0 | |
for b in box.children: | |
if (w + b.width) > width: | |
# remove glue from end of line | |
this_line.children[this_line.children.high].postglue = 0 | |
# XXX we may not actually need to do this | |
# commit current line and begin next one | |
this_line.recalculate_bounds | |
result.children.add(this_line) | |
reset_line(this_line) | |
w = 0 | |
# now add new content to the line | |
inc w, b.width | |
inc w, b.postglue | |
this_line.children.add(b) | |
#- .Add final line, if needed | |
if this_line.children.len > 0: | |
# remove glue from end of line | |
this_line.children[this_line.children.high].postglue = 0 | |
this_line.recalculate_bounds | |
result.children.add(this_line) | |
result.recalculate_bounds | |
#- | |
#- === Pretty Line Breaking | |
#- Pretty line breaking uses a combination of TeX's "dynamic programming" | |
#- based algorithm <<Knuth-Plass>>, coupled with optimizations from | |
#- <<Kenninga>>, <<jaroslov>> and Skrylar's own tweaking. | |
#- | |
#- | |
#- | |
#- ==== Planning (internal) | |
#- Knuth-Plass line breaking is recursive, so we model the majority of its | |
#- implementation as a function which can call itself. | |
#- | |
#- Some implementations construct trees instead of performing recursion. | |
proc knuthplass_break_inner(box: var Box; start, horizon, width, margin: int; wall: int = int.high) = | |
if horizon == 0: | |
box.children[start].badness = 0 | |
box.children[start].bestbreak = int16.high | |
return | |
#- .Greedy fill the line until we hit the margin. | |
var w = 0 | |
var i = start | |
while i < box.children.len: | |
if ((w + box.children[i].width) > width) and (w != 0): break | |
inc w, box.children[i].width | |
inc w, box.children[i].postglue # questionable | |
inc i | |
if w > margin: break | |
dec w, box.children[i-1].postglue | |
var best_index, best_score, barrier: int | |
#- .Calculate a line's badness score from its current width | |
proc score(w: int): int = | |
return pow(abs(margin - w).float, 2).int | |
#- .Check if there are words on the line after this. | |
if i < box.children.len: | |
#- .Use the obvious linebreak as a baseline for scoring | |
knuthplass_break_inner(box, i, horizon-1, width, margin) | |
best_index = i | |
best_score = box.children[i].badness | |
barrier = best_score + score(w) | |
dec i | |
#- .Start cutting back words until quality stops improving | |
while i > start: | |
knuthplass_break_inner(box, i, horizon-1, width, margin, barrier) | |
# DISQUALIFIED | |
if box.children[i].badness > wall: | |
return | |
# improvement | |
if box.children[i].badness < best_score: | |
best_score = box.children[i].badness | |
best_index = i | |
dec w, box.children[i].width | |
dec i | |
if i >= 0: | |
dec w, box.children[i].postglue | |
# hot garbage | |
else: | |
break | |
#- .We filled the end of the paragraph. | |
else: | |
best_index = box.children.len | |
#- .Commit best scores. | |
box.children[start].badness = barrier.int16 | |
box.children[start].bestbreak = best_index.int16 | |
#- ==== Breaking (public) | |
#- | |
#- `margin` is the *desired* width of text. Greedy line wrapping will occur | |
#- until one of two things happens: | |
#- | |
#- . The line's `width` budget would be blown, or, | |
#- . The last added word went over the margin | |
#- | |
#- Then the more expensive linear programming steps are taken to see how | |
#- many words need to be deleted to form an optimal paragraph. | |
#- | |
#- `width` is the *maximum* width of text. No line may be longer than this | |
#- unless it is impossible to do otherwise. | |
#- | |
#- `horizon` is an optimization <<Kenninga>>. By default the horizon is | |
#- infinite which is safe for trusted inputs which do not change rapidly. | |
#- Setting a horizon limits how tall of a tree is evaluated to find the | |
#- best break points. `horizon` lines will be evaluated for optimality and | |
#- committed to the paragraph. Then the next batch of lines are checked | |
#- until the whole paragaph is set. | |
proc knuthplass_break*( | |
box: var Box; width, margin: int; | |
horizon: int = int.high): Box = | |
# start by setting every break to maximal badness | |
for i in 0..box.children.high: | |
box.children[i].badness = int16.high | |
box.children[i].bestbreak = int16.high | |
var this_line: Box | |
result = Box() | |
result.flow = TopToBottom | |
template reset_line(line: var untyped) = | |
line = Box() | |
line.kind = Container | |
line.flow = LeftToRight | |
reset_line(this_line) | |
var i = 0 | |
while i < box.children.len: | |
# evaluate break points | |
knuthplass_break_inner(box, i, horizon, width, margin) | |
# now follow the best breaks and build our vbox | |
while i < box.children.len: | |
template b: untyped = box.children[i] | |
# breakpoint not evaluated due to horizon constraints | |
if b.bestbreak == int16.high: break | |
for j in i..<b.bestbreak: | |
this_line.children.add(box.children[j]) | |
this_line.recalculate_bounds | |
result.children.add(this_line) | |
reset_line(this_line) | |
i = b.bestbreak | |
if this_line.children.len > 0: | |
this_line.recalculate_bounds | |
result.children.add(this_line) | |
#- | |
when ismainmodule: | |
import printingpress | |
import skcbor | |
var binfile: File | |
var typeface: Typeface | |
var reader: CborReader | |
discard binfile.open("main_font.bin", fmRead) | |
reader.open_to_file(binfile) | |
reader.read(typeface) | |
reader.close() | |
var b = Box() | |
echo "step=set_to_ansi" | |
#b.set_to_ansi("I am a toast, the largest toast to ever have once been bread.") | |
b.set_to_ansi("wot do") | |
echo b | |
echo "step=hydrate_spaces" | |
b.hydrate_spaces(typeface.space_width) | |
echo "step=recalculate_bounds" | |
b.recalculate_bounds(addr typeface, true) | |
echo b | |
echo "step=linebreak" | |
#var broken = greedy_break(b, 100) | |
var broken = knuthplass_break(b, 100, 75) | |
echo broken | |
#- [bibliography] | |
#- == References | |
#- - [[[Knuth-Plass]]] Breaking Paragraphs into Lines. Donald E. Knuth, Michael F. Plass. https://jimfix.github.io/math382/knuth-plass-breaking.pdf | |
#- - [[[Kenninga]]] Optimal Line Break Determination. US Patent 6,510,441. Eric A. Kenninga. | |
#- - [[[jaroslov]]] knuth plass thoughts. Jacob N. Smith. https://github.com/jaroslov/knuth-plass-thoughts/blob/master/plass.md |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment