Last active
September 28, 2018 15:22
-
-
Save talyian/8b9bb45ea7043eab038000de741a175f to your computer and use it in GitHub Desktop.
POC for New Breve Idea
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Breve was designed as a backwards-compatible Blitz replacement that does safe context-aware sanitization. | |
The current architecture uses Blitz's parser frontend + Latte as a context-aware backend to achieve this goal. This was because I figured writing a context-aware HTML+macro parser was harder than re-implementing Blitz's logic flows (blocks, variable lookups, partial templates, etc.) in Latte. | |
As it turns out, reimplementing logic is a difficult problem. An alternate solution would be to write our own Blitz-compatible frontend that just adds context-aware output and delegates to Blitz for the logic implementation. This is a Proof of concept that shows it isn't that bad in a language that makes writing parsers easy. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open System.Text.RegularExpressions | |
open System.Collections.Generic | |
type HtmlToken = | Id | Whitespace | FullComment | Macro | Sym | CommentStart | Begin | Info | |
with override this.ToString() = sprintf "%A" this | |
type HtmlContext = | |
| Html | Comment | |
| Tag | TagBegin | TagCloseBegin | |
| TagEnding | TagAttr1 | TagAttrval | |
| TagStringD | TagStringS | |
with override this.ToString() = sprintf "%A" this | |
type JsContext = | |
| Regex | JsComment | SingleQuote | DoubleQuote | JsNone | |
with override this.ToString() = sprintf "%A" this | |
module HTMLTokens = | |
let tokenize str = | |
let tokens = Regex.Matches(str, @"(\w+)|(\s+)|(<!--.*?-->)|({{.*?}})|(.)", RegexOptions.Singleline) | |
let tok (token:Match) = seq { | |
let str = token.Groups.[0] | |
let tokentype = | |
if token.Groups.[1].Length > 0 then HtmlToken.Id | |
elif token.Groups.[2].Length > 0 then Whitespace | |
elif token.Groups.[3].Length > 0 then FullComment | |
elif token.Groups.[4].Length > 0 then Macro | |
else Sym | |
if tokentype = FullComment then yield ("", CommentStart, str.Index) | |
yield (str.Value, tokentype, str.Index) | |
} | |
[for m:Match in tokens do yield! tok m] | |
// Calculate HTML lexer context in a token stream -- this extracts lexical html state like tags, attributes, and text | |
// given token in a stream and a previous context, return the new context | |
let contextmap context (token, toktype, idx) = | |
match context, token, toktype with | |
| (Html, "<", _) -> TagBegin | |
| (Html, _, CommentStart) -> Comment | |
| (Comment, _, FullComment) -> Html | |
| (Html, _, _) -> Html | |
| (TagBegin, "/", _) -> TagCloseBegin | |
| (TagBegin, tag, HtmlToken.Id) -> Tag | |
| (TagBegin, macro, Macro) -> Tag | |
| (TagCloseBegin, tag, HtmlToken.Id) -> TagEnding | |
| (Tag, attrname, HtmlToken.Id) -> TagAttr1 | |
| (Tag, macro, Macro) -> TagAttr1 | |
| (Tag, ">", _) -> Html | |
| (Tag, "/", _) -> TagEnding | |
| (TagEnding, ">", _) -> Html | |
| (TagAttr1, "-", _) -> Tag | |
| (TagAttr1, "=", _) -> TagAttrval | |
| (TagAttr1, _, Whitespace) -> TagAttr1 | |
| (TagAttr1, _, HtmlToken.Id) -> TagAttr1 | |
| (TagAttr1, ">", _) -> Html | |
| (TagAttr1, "/", _) -> TagEnding | |
| (TagAttrval, "\"", _) -> TagStringD | |
| (TagAttrval, "'", _) -> TagStringS | |
| (TagAttrval, _, Whitespace) -> TagAttrval | |
| (TagAttrval, ">", _) -> Html | |
| (TagAttrval, value, _) -> TagAttrval | |
| (TagStringD, "\"", _) -> Tag | |
| (TagStringD, _, _) -> TagStringD | |
| (TagStringS, "'", _) -> Tag | |
| (TagStringS, _, _) -> TagStringS | |
| (n, tagInfo, Info) -> n | |
| (n, _, Macro) -> n | |
| (n, _, Whitespace) -> n | |
| (n, _, _) -> failwithf "invalid transition: %A" (n, idx) | |
let withTag cmap state y = | |
let (_, _oldcontext, _oldtoken) = state | |
let newcon = cmap _oldcontext y | |
// printfn "%A" (_oldcontext, newcon, y) | |
(_oldcontext, newcon, y) | |
let context_and_token tokens = List.scan (withTag contextmap) (Html, Html, ("", Begin, 0)) tokens | |
let elementStream token_stream = | |
let mutable context = "html" | |
let mutable curtag = "" | |
let pushElement tag = curtag <- tag; if tag = "script" then context <- "js-beginning" | |
let popElement token = | |
let tag = curtag | |
curtag <- "" | |
match context, tag, token with | |
| "html", "script", _ -> Some("js-beginning") | |
| "js-beginning", "script", ">" -> Some("js") | |
| "js", "/script", _ -> Some("html") | |
| _ -> None | |
let mutable curattr = "" | |
let pushAttr name = curattr <- name | |
let startAttr () = | |
let name = curattr.ToLower() | |
if context = "html" then | |
if name.Length > 1 && name.[0..1] = "on" then Some("js-attr") | |
elif name = "href" then Some("url-attr") | |
elif name = "src" then Some("url-attr") | |
else Some("attr") | |
else None | |
let clearAttr () = | |
match context with | |
| "attr" | |
| "js-attr" | |
| "url-attr" -> context <- "html" | |
| _ -> () | |
seq { | |
for tok in token_stream do | |
let prev_context = context | |
match tok with | |
// managing tag stack | |
| TagBegin, Tag, (name, HtmlToken.Id, i) -> pushElement name | |
| TagCloseBegin, TagEnding, (name, HtmlToken.Id, i) -> pushElement ("/" + name) | |
| Tag, Html, _ | |
| TagEnding, Html, _ | |
| TagAttr1, Html, _ -> | |
match popElement(let (a,b,(c,d,i)) = tok in c) with | Some(c) -> context <- c | None -> () | |
// attribute stack | |
| Tag, TagAttr1, (name, HtmlToken.Id, i) -> pushAttr name | |
| TagAttr1, TagAttrval, _ -> match startAttr() with | Some(c) -> context <- c | None -> () | |
| _, Tag, _ -> clearAttr(); | |
| _ -> () | |
yield prev_context, context, tok } | |
// get js-quote context | |
let jsStream element_stream = seq { | |
let mutable context = JsNone | |
for element in element_stream do | |
let (c1, c2, (tok1, tok2, token)) = element | |
match c1, c2, (tok1 = tok2) with | |
| "js", "js", true | |
| "js-attr", "js-attr", true -> | |
let t, _, i = token | |
match context, t with | |
| JsNone, "'" -> context <- SingleQuote | |
| JsNone, "\"" -> context <- DoubleQuote | |
| SingleQuote, "'" -> context <- JsNone | |
| DoubleQuote, "\"" -> context <- JsNone | |
| _ -> () | |
| _ -> () | |
yield context, element | |
} | |
type BlitzTemplate (content) = | |
let htmltokens = HTMLTokens.tokenize content | |
let htmlcontexts = HTMLTokens.context_and_token htmltokens | |
let jscontexts = htmlcontexts |> HTMLTokens.elementStream |> HTMLTokens.jsStream |> Seq.toList | |
member this.Content = content | |
member val HtmlContexts = HTMLTokens.context_and_token htmltokens | |
member val JsContexts = htmlcontexts |> HTMLTokens.elementStream |> HTMLTokens.jsStream |> Seq.toList | |
let action = Environment.GetCommandLineArgs() |> Seq.rev |> Seq.item 1 | |
let filename = Environment.GetCommandLineArgs() |> Seq.last | |
let content = filename |> IO.File.ReadAllText | |
let template = BlitzTemplate(content) | |
let print_tokens (template:BlitzTemplate) = Seq.iter (printfn "%O") template.JsContexts | |
let print_escaped (template:BlitzTemplate) = | |
for jstype, (html1, html2, (xml1, xml2, (t, ttype, pos))) in template.HtmlContexts |> HTMLTokens.elementStream |> HTMLTokens.jsStream do | |
let htmlContext = match xml2 with | |
| TagAttrval -> "attrval" | |
| TagAttr1 -> "attrname" | |
| Tag -> "tagname" | |
| Html -> "html" | |
| TagStringD -> "attrdstring" | |
| TagStringS -> "attrsstring" | |
| Comment -> "comment" | |
| _ -> "unknown" | |
let htmlContext = if html2 = "js" then "cdata" else htmlContext | |
let semanticContext = match jstype, html2 with | |
| _, "url-attr" -> "url" | |
| JsComment, _ -> "jscomment" | |
| SingleQuote, _ -> "jsstring" | |
| DoubleQuote, _ -> "jsstring" | |
| JsContext.Regex, _ -> "jsregex" | |
| JsNone, "js" -> "js" | |
| JsNone, "js-attr" -> "js" | |
| _ -> "none" | |
let m = Regex.Match(t, "{{(\$[\w.]+)}}") | |
if m.Success then | |
printf "\x1b[0;32m"; | |
if semanticContext = "none" then | |
printf "{{%O(%O)}}" htmlContext (m.Groups.[1]) | |
elif htmlContext = "cdata" then | |
printf "{{%O(%O)}}" semanticContext (m.Groups.[1]) | |
else | |
printf "{{%O(%O(%O))}}" htmlContext semanticContext (m.Groups.[1]) | |
printf "\x1b[0;m"; | |
else | |
printf "%s" t | |
print_escaped template |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
docker-run: | |
docker run --rm -v ${PWD}:/work fsharp bash -c 'cd /work; fsharpc everb.fs --nowarn:58 --nologo && cli everb.exe example.tpl' | |
run: | |
fsharpc everb.fs --nowarn:58 && mono everb.exe example.tpl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jimmyt@:~/git/eat/everb (master) $ make | |
docker run --rm -v /Users/jimmyt/git/eat/everb:/work fsharp bash -c 'cd /work; fsharpc everb.fs --nowarn:58 --nologo && cli everb.exe example.tpl' | |
{{html($html)}} | |
<A {{attrname($tag)}}/> | |
<{{tagname($tag)}} /> | |
<div {{attrname($tag)}}={{attrval($tagvalue)}} b = {{attrval($tagvalue)}}/> | |
<A a = {{attrval($val)}} b={{attrval($val)}} /> | |
<B a = '{{attrsstring($val)}}' b ="{{attrdstring($val)}}" /> | |
<I a='{{attrsstring($val)}}' b ="{{attrdstring($val)}}" /> | |
<A href='{{attrsstring(url($url))}}' href={{attrval(url($url))}} /> | |
<B onclick={{attrval(js($js))}} onclick="{{attrdstring(js($js))}}='{{attrdstring(jsstring($js2))}}'" /> | |
<script> | |
var x = {{js($js)}}; | |
var y = "{{jsstring($jss)}}"; | |
// var z = {{js($comment)}}; | |
var w = /foo["']{{jsstring($regex)}}/g; | |
</script> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment