Last active
February 5, 2024 17:21
-
-
Save chtenb/35410c6c66581cf5fd81ff1be697da9a to your computer and use it in GitHub Desktop.
koka segfault
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import std/core/unsafe | |
import std/core/undiv | |
pub fun debug(msg : string) : total () | |
unsafe-total({println(msg)}) | |
pub effect yield<a> | |
ctl yield(elem : a) : () | |
pub fun iter(g : () -> <yield<a>|e> ()) : iter<a,e> | |
Iter({ | |
handle g | |
return(_) Nothing | |
ctl yield(a) | |
Just((a, Iter(fn() resume(())))) | |
}) | |
type iter<a,e> | |
Iter(cont : () -> e maybe<(a,iter<a,e>)>) | |
fun next(it : iter<a,e>) : e maybe<(a,iter<a,e>)> | |
debug("next") | |
(it.cont)() | |
pub fun is-empty(it : iter<a,e>) : e bool | |
it.next.is-nothing | |
pub fun iter/list(it : iter<a,<div|e>>) : <div|e> list<a> | |
match it.next | |
Nothing -> Nil | |
Just((elem, rest)) -> Cons(elem, rest.list) | |
// Stream of tokens to be parsed. | |
// The stream has to be deterministic, in the sense that a stream generating function | |
// can be stored and will always yield the same tokens when called. | |
pub alias input<t> = iter<t,<>> | |
pub alias parse<t,e> = <div,parse-effect<t>|e> | |
pub alias parser<t,a,e> = () -> parse<t,e> a | |
pub type parse-result<t,a> | |
ParseOk(result: a, rest : input<t>) | |
ParseError(msg : string, rest : input<t>) | |
pub effect parse-effect<t> | |
fun satisfy(pred : input<t> -> total maybe<(a,input<t>)>) : maybe<a> | |
ctl fail(msg : string) : a | |
ctl pick() : bool | |
fun get-input() : input<t> | |
fun set-input(input : input<t>) : () | |
pub fun parse(input0 : input<t>, p : parser<t,a,e>) : <div|e> parse-result<t,a> | |
var input := input0 | |
handle p | |
return(x) | |
ParseOk(x,input) | |
fun get-input() | |
input | |
fun set-input(new) | |
input := new | |
final ctl fail(msg) | |
ParseError(msg,input) | |
fun satisfy(pred) | |
debug("satisfy") | |
val inp = input | |
val m = fn() : total _ { pred(inp) } | |
match m() | |
Just((x,cap)) -> { input := cap; Just(x) } | |
Nothing -> Nothing | |
ctl pick() | |
val save = input; | |
match resume(True) | |
ParseOk(x1,rest1) -> ParseOk(x1,rest1) | |
err1 -> | |
input := save | |
match resume(False) // todo: limit lookahead? | |
ParseOk(x2,rest2) -> ParseOk(x2,rest2) | |
_err2 -> err1 // todo: merge or pick closest? | |
pub fun many(p : parser<t,a,e>) : parse<t,e> list<a> | |
val input = get-input() | |
val (acc, rest) = mask<parse-effect> | |
many-rec(p, input, []) | |
set-input(rest) | |
acc.reverse | |
fun many-rec(p : parser<t,a,e>, input : input<t>, acc : list<a>) : <div|e> (list<a>, input<t>) | |
match parse(input, p) | |
ParseOk(x, rest) -> many-rec(p, rest, Cons(x, acc)) | |
ParseError(_, _) -> (acc, input) | |
pub fun many1(p : parser<t,a,e>) : parse<t,e> list<a> | |
debug("many1") | |
Cons(p(), many(p)) | |
// ---------------------------- FROM STD ------------------------------------ | |
pub fun (||)(p1 : parser<t,a,e>, p2 : parser<t,a,e>) : parse<t,e> a | |
if pick() then p1() else p2() | |
pub fun satisfy-fail(msg : string, pred : input<t> -> maybe<(a,input<t>)>) : parse<t,e> a | |
debug("satisfy-fail") | |
match satisfy(pred) | |
Nothing -> fail(msg) | |
Just(x) -> x | |
pub fun token-is(msg : string, pred : t -> bool) : parse<t,e> t | |
debug("token-is") | |
satisfy-fail(msg) fn(input) | |
match input.next | |
Nothing -> Nothing | |
Just((t, rest)) -> if pred(t) then Just((t,rest)) else Nothing | |
pub fun char(c : char) : parse<char,total> char | |
token-is(show(c), fn(c0) c == c0 ) | |
pub fun none-of(chars : string) : parse<char,total> char | |
debug("none-of") | |
token-is("", fn(c : char) !chars.contains(c.string)) | |
// ---------------------- TOKENIZATION ---------------------------- | |
fun sslice/tokenize(s : sslice) : yield<char> () | |
s.foreach fn(c) | |
yield(c) | |
fun string/tokenize(s : string) : yield<char> () | |
s.slice.tokenize | |
fun list/tokenize(l : list<a>) : yield<a> () | |
l.foreach fn(t) | |
yield(t) | |
// Necessary because otherwise the effect variable is interpreted differently or something? | |
pub fun iter/is-empty'(it : iter<a,<>>) : bool | |
it.next.is-nothing | |
fun parser/tokenize(p : parser<t,a,<>>, input : input<t>, show : a -> string) : <div,exn,yield<a>> () | |
debug("tokenize") | |
match parse(input, p) | |
ParseOk(a, rest) -> | |
// TODO: assert advance | |
debug("parse ok: " ++ a.show) | |
yield(a) | |
tokenize(p, rest, show) | |
ParseError(msg, rest) -> | |
debug("parse err") | |
if !rest.is-empty' then | |
throw("Error during tokenization: " ++ msg) | |
// ---------------------- EXAMPLES ---------------------------- | |
pub value type dsv-token | |
Value(s : string) | |
Sep | |
Newline | |
pub fun dsv/show(t : dsv-token) : string | |
match t | |
Value(s) -> "val: " ++ s | |
Sep -> " | " | |
Newline -> " lf\n" | |
pub fun dsvlist/show(dsv : list<dsv-token>) : string | |
dsv.map(show).join() | |
fun parse-tsv-value() : parse<char,total> dsv-token | |
debug("parse-tsv-value") | |
val chars = many1({ none-of("\t\r\n") }) | |
debug("parsed " ++ chars.string) | |
Value(chars.string) | |
fun parse-tsv-sep() : parse<char,total> dsv-token | |
debug("parse-tsv-sep") | |
val _ = char('\t') | |
Sep | |
fun parse-newline() : parse<char,total> dsv-token | |
debug("parse-tsv-newline") | |
val _ = char('\n') | |
Newline | |
fun parse-tsv-token() : parse<char,total> dsv-token | |
parse-tsv-value || ({parse-tsv-sep || parse-newline}) | |
pub fun tokenize-tsv(input : input<char>) : <div,exn,yield<dsv-token>> () | |
tokenize(parse-tsv-token, input, show) | |
pub fun main() | |
val input = iter({"1".tokenize}) | |
val result = iter(fn() tokenize-tsv(input)).list | |
println("Output:") | |
println(result.show) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment