Skip to content

Instantly share code, notes, and snippets.

@sir-deenicus
Last active January 1, 2016 12:09
Show Gist options
  • Save sir-deenicus/8143136 to your computer and use it in GitHub Desktop.
Save sir-deenicus/8143136 to your computer and use it in GitHub Desktop.
Hyphenation algorithm used by Tex.
type HyphTree =
| Node of (int [] option) * Map<char, HyphTree>
| Empty
let insertpattern tree pattern =
let chars = pattern |> Seq.filter (Char.IsNumber >> not) |> Seq.toArray
let points = Text.RegularExpressions.Regex.Split(pattern, "[.a-z]") |> Array.map (fun c -> let b, i = Int32.TryParse c in if b then int i else 0)
let rec bt t = function
| i when i = chars.Length -> Node(Some points, Map.empty)
| i ->
let pl,tdat = match t with | Node (l,m) -> l,m | _ -> None,Map.empty
Node(pl,mapAddGeneric tdat chars.[i] (fun intree -> bt intree (i+1)) (bt Empty (i+1)))
bt tree 0
let hyphenate tree (worda:string) =
let word = tolower worda
if word.Length <= 4 then [|word|]
else
let usedword = "." + word + "."
let ps = Array.create (usedword.Length + 1) 0
for i in 0..usedword.Length - 1 do
let _,_,_ =
recurse (fun (charindex,_,continuing) -> not continuing || charindex = usedword.Length - i) //in usedword.[i..][c] must account for the fact that later start makes shorter word
(fun (charindex,t,_) ->
let c = usedword.[i..].[charindex]
match t with
| Node(optpoint, treedict) ->
let nexttree = mapGet treedict c Empty
match optpoint with
| Some p ->
for k in 0..p.Length - 1 do ps.[i + k] <- max ps.[i + k] p.[k]
| _ -> ()
(charindex + 1,nexttree, nexttree <> Empty)
| _ -> (charindex,t,false) )
(0,tree,true)
()
ps.[2] <- 0; ps.[ps.Length - 2] <- 0; ps.[ps.Length - 3] <- 0
let usepoint = ps.[2..]
let sylls,lastsyll,i =
word |> Seq.fold (fun (wlist,curstring,i) c ->
if usepoint.[i] % 2 = 1 then ((curstring + string c)::wlist, "", i + 1)
else wlist, curstring + string c, i + 1) ([],"",0)
lastsyll::sylls |> List.rev |> List.toArray
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment