Skip to content

Instantly share code, notes, and snippets.

@Heimdell
Last active August 4, 2017 22:22
Show Gist options
  • Save Heimdell/d6ef88528eac60e2224654546abe8ab5 to your computer and use it in GitHub Desktop.
Save Heimdell/d6ef88528eac60e2224654546abe8ab5 to your computer and use it in GitHub Desktop.
Parser for a Llama language + sample code
/*
This is a Lazy Abstract Machine (LAM) -
- runtime representation for lazy program.
Program is represented as a graph made out of nodes, and the evaluation
is done via graph rewriting.
Node can be viewed as a container with 1 delayed value. You call node.value()
to ensure the value is forced to compute.
This is better - in terms of representation - than trampolining
(returning a "() => f(x, y)" and "while (val is Function) { val = val() }"),
but is slower. However, speed is not our concern on this stage
(although I think current representation is pretty fast).
*/
/*
I understand why this shit is hidden. But I need this shit.
*/
function show (x) {
let util = require('util')
return util.inspect(x, {depth: null})
}
/*
This is the class for lazy code node.
It instance is a lazy container for a value.
Every point of program can be represented as a value. And we make use of
js functions so we don't have to implement substitution/reduction mechanics.
*/
class Node {
/*
This factory produces a Node with pre-existing value, so it isn't forced
but immidiately returned on .value() call.
*/
static immediate(result) {
return new Node({result})
}
/*
This factory produces a Node with no value at the start;
the call to .value() will evaluate thunk(), store its value in .result and return it.
*/
static thunk(thunk) {
return new Node({thunk})
}
constructor(state) {
Object.assign(this, state)
}
/*
This call retrieves value.
If the Node is constructed as immediate,
the value passed is returned. Otherwise, the thunk is evaluated
and its result cached and returned.
*/
value() {
/*
There are 2 cases when we must reduce the node:
1) if its result is undefined;
2) if its previous reduction returned another node.
*/
while (this.result === undefined || this.result instanceof Node) {
if (this.result === undefined) {
Node.reductions = Node.reductions || 0
Node.reductions++
this.result = this.thunk()
continue
}
// thunk() returned another node. "Move this" onto it and restart cycle.
this.thunk = this.result.thunk
this.result = this.result.result
}
return this.result
}
}
let {immediate, thunk} = Node
/*
Functions are constants. When function is called, it can produce fresh,
unevaluated nodes again.
If some of the nodes inside the function SHOULD NOT be evaluated multiple times
(constant expr independent of the function arguments), they should be extracted
to the outside of the function. So
> addCrap(a) = foo * bar + a
will become
> addCrap =
> let
> ; __freshVar_42 = foo * bar
> in (a) -> __freshVar_42 * a
The let-expression is an unevaluated (yet) node, so it will make it only force
(foo * bar) once.
This optimisation should be authomatically done by compiler.
*/
function func(x) {
return immediate(x)
}
/*
Lazy call node. It only forces a function to evaluate - its the function
business to force the arguments (if they are needed).
*/
function call (f, ...xs) {
return thunk(() => {
let func = f.value()
if (! (func instanceof Function)) {
throw Error("call: calling non-function - " + show(f))
}
return func(...xs)
})
}
class Record {
constructor(fields) {
Object.assign(this, fields)
}
}
/*
Pattern-matcher. Very basic - it allows to determine only the outermost record
subtype. All the
> case list of
> | Push(Just(1), xs) -> ...
will be done through ifs (== 1?) inside matcher (Just?) inside matcher (Push?).
*/
function match (o, pats) {
return thunk(() => {
let ov = o.value()
if (! (ov.constructor <= Record))
throw Error("Matching non-ADT construct, but: " + o.value())
let fun = pats[ov.constructor] || pats["*"]
/*
MAYBE I will rewrite this into a cycle (if the JS optimizer won't).
*/
return call(fun, ov)
})
}
/*
VERY important thing I almost forgot.
It is a node that on evaluation forces all the nodes from the list
to evaluate and returns the last one.
*/
function sequence (...nodes) {
return thunk(() => {
if (!nodes.length === 0) {
throw Error("sequence of zero elements is unallowed (hint: what should it return, huh?)")
}
// I hope, its the most concise (and cryptic!) way to implement this!
return nodes.reduce((_, node) => node.value())
})
}
/*
This node transformer turns node to evaluated on its creation.
*/
function force (node) {
return Node.immediate(node.value())
}
Object.assign(module.exports,
{ show
, func
, call
, match
, sequence
, immediate
, thunk
, force
, Node
, Record
})
import System.Environment
import ProgramTree
import PrettyPrint ()
import Parser
main = do
[file] <- getArgs
txt <- readFile file
let res = parse toplevel file txt
case res of
Left err -> do
putStrLn (previewError txt err)
Right yes -> do
print yes
module Parser (toplevel, parse, previewError) where
import Control.Applicative
import Control.Monad (guard)
import Data.List (intercalate)
import Text.Parsec.Error
import Text.Parsec.Pos
import Text.ParserCombinators.Parsec hiding (many, (<|>), optional)
import ProgramTree
import Util
import Debug.Trace as Debug
getInfo = do
pos <- getPosition
return (Info (sourceColumn pos) (sourceLine pos) (sourceName pos))
pinned ctor = pure ctor <*> getInfo
infixl 4 |=, |.
(|=) = (<*>)
(|.) = (<*)
toplevel = pure Module
|. spaces
|= exported decl `commentedList` ";"
|. eof
decl = empty
<|> Binding <$> binding
<|> Extend <$> extend
<|> Datatype <$> datatype
where
extend = pinned Method
|. tok "extend"
|= ctor
|= option [] (inParens (identifier `list` ","))
|= optionMaybe (tok "as" *> identifier)
|= inBracets (binding `commentedList` ";")
datatype = pinned Data
|. tok "ctor"
|= ctor
|= option [] (inParens (identifier `list` ","))
expr :: Parser AST
expr =
chord
where
term' = t $ empty
<|> var
<|> const
<|> lambda
<|> ifs
<|> match
<|> lMatch
<|> letExpr
-- <|> ctor
term = do
t <- term'
ds <- many $ do
i <- getInfo
tok "with"
ds <- inBracets (delta `commentedList` ";")
return $ \x -> With i x ds
return $ foldl (|>) t ds
delta = update <|> assign <|> remove
update = pure Modify
|= try (identifier <* tok "<-")
|= expr
assign = pure Set
|= try (identifier <* tok "=")
|= expr
remove = pure Remove
|= identifier
|. tok "removed"
var = pinned Var |= identifier
const = pinned Constant |= constant
ifs = pinned If
|. tok "if"
|= inBracets (ifAlt `list` "|")
where
ifAlt = pinned IfAlt
|= expr
|. tok "?"
|= expr
chord = do
what <- call
rest <- many $ do
i <- getInfo
op <- pinned Var |= identifier
args <- pure <$> call <|> inParens (call `list` ",")
return (Call i op . (: args))
return $ foldl (|>) what rest
lambda = pinned Lambda |= do
pinned Bind
|= pure "" -- option "" (char '@' *> identifier)
|= some (tok "\\" *> identifier `list` ",")
|. tok "->"
|= expr
|= pure []
call = do
f <- term
xs <- many action
return $ foldl (|>) f xs
where
action :: Parser (AST -> AST)
action = empty
<|> trueCall
<|> project
where
trueCall = pinned (flip . Call)
|= inParens (chord `list` ",")
project = pinned (flip . Project)
|. tok "."
|= identifier
letExpr = inParens $ pinned Let
|. optional (tok ";")
|= exported decl `commentedListEnd` ";"
|= chord
match = pinned Match
|. tok "case"
|= expr
|. tok "of"
|= inBracets (alt `list` "|")
lMatch = pinned LMatch
|. tok "function"
|= inBracets (alt `list` "|")
alt = pinned Alt
|= ctor
|= option [] (inParens (identifier `list` ","))
|. tok "->"
|= expr
constant :: Parser Constant
constant = t $ empty
<|> pure Number |= number
<|> pure String |= string
<|> pure Record |= bindings
where
bindings = inBracets $ binding `commentedList` ";"
string = do
cs <- some chunk
return (show $ "'" `intercalate` cs)
where
chunk = do
char '\''
anyChar `manyTill` char '\''
number = do
initial <- some digit
rest <- optionMaybe $ do
char '.'
rest <- many digit
return rest
return $ initial ++ case rest of
Just rest -> "." ++ rest
Nothing -> ""
exported thing = private <|> public
where
public = Exported True <$> thing
private = do
tok "private"
Exported False <$> thing
commented :: Parser a -> Parser (Commented a)
commented thing = do
comms <- many comment
it <- thing
return (Commented comms it)
where
comment = do
try $ string "---"
comm <- anyChar `manyTill` newline
spaces
return comm
binding = open <|> reexport <|> bind <|> capture
bind = do
i <- getInfo
(name, arglists) <- try $ do
name <- identifier
arglists <- many $ inParens (identifier `list` ",")
is
return (name, arglists)
body <- expr
whereBlock <- optionMaybe $ do
tok "where"
inBracets (binding `commentedList` ";")
let res = Bind i name arglists body (maybe [] id whereBlock)
return res
capture = pinned Captured
|. tok "capture"
|= inParens (identifier `list` ",")
open = pinned Open
|. tok "open"
|= expr
|. tok ":"
|= identifier `list` ","
reexport = pinned Reexport
|. tok "re-export"
|= expr
|. tok ":"
|= identifier `list` ","
t :: Parser a -> Parser a
t p = p <* spaces
inBracets p = tok "{" *> p <* tok "}"
inParens p = tok "(" *> p <* tok ")"
commentedList p sep = do
first <- do
Commented comms _ <- commented (optional (tok sep))
Commented comms' res <- commented p
return (Commented (comms ++ comms') res)
rest <- many $ do
Commented comms _ <- try $ commented (tok sep)
Commented comms' res <- commented p
return (Commented (comms ++ comms') res)
optional (tok sep)
return (first : rest)
<|> pure []
commentedListEnd p sep = do
first <- try $ do
Commented comms _ <- commented (optional (tok sep))
Commented comms' res <- commented p
return (Commented (comms ++ comms') res)
rest <- many $ try $ do
Commented comms _ <- try $ commented (tok sep)
Commented comms' res <- commented p
return (Commented (comms ++ comms') res)
tok sep
return (first : rest)
<|> pure []
list p sep = do
optional (tok sep)
res <- p `sepBy` tok sep
optional (tok sep)
return res
is = tok "=" <|> tok ":"
tok s = try $ t (string s)
identifier = don'tCare <|> projector <|> identifier'
where
don'tCare = tok "_"
projector = (:) <$> char '.' <*> identifier'
identifier' = try $ do
n <- oneOf starter
ame <- many $ oneOf follower
let name = n:ame
guard (not (reserved name))
spaces
return name
where
starter = concat [['A'..'Z'], ['a'.. 'z'], "~!@#$%^&*-+|/?><="]
follower = starter ++ ['0'.. '9']
ctor = try $ do
n <- oneOf starter
ame <- identifier
let name = n:ame
guard (not (reserved name))
spaces
return name
where
starter = concat [['A'..'Z'], ":"]
follower = starter ++ "=" ++ ['0'.. '9']
reserved = flip elem $ words
"case of function extend ctor where open capture with \\ -> | = if --- ? "
bp mark = do
input <- getInput
Debug.traceShow (mark, take 10 input) $ return ()
previewError txt err =
let
ls = lines txt
pos = errorPos err
line = sourceLine pos
col = sourceColumn pos
src = ls !! (line - 1)
before = [show (i + 1) ++ ": " ++ l | i <- [line - 4, line - 3, line - 2], let Just l = ls |? i]
after = [show (i + 1) ++ ": " ++ l | i <- [line + 0, line + 1, line + 2], let Just l = ls |? i]
pre = show line ++ ": "
xs |? i = listToMaybe (drop i xs)
listToMaybe [] = Nothing
listToMaybe (x:_) = Just x
in
unlines $ concat
[ [ showErrorMessages
"or"
"unknown parse error"
"expecting"
"unexpected"
"end of input"
(errorMessages err) ]
, before
, [ pre ++ src ]
, [ replicate (length pre + col - 1) ' ' ++ "^" ]
, after
]
{-# language LambdaCase #-}
module PrettyPrint () where
import Text.PrettyPrint
import ProgramTree
class Pretty p where
pretty :: p -> Doc
instance Show Module where
show = show . pretty
instance Pretty Module where
pretty (Module decls) = vcat (map pretty decls)
instance Pretty AST where
pretty = \case
Var _ n -> text n
Constant _ c -> pretty c
Lambda _ b -> lambda b
Call _ f xs -> call (pretty f) (map pretty xs)
LMatch _ az -> lMatch (map pretty az)
Match _ o az -> match (pretty o) (map pretty az)
Project _ o f -> project (pretty o) (text f)
Let _ bs o -> letExpr (map pretty bs) (pretty o)
If _ alts -> ifExpr (map pretty alts)
Ctor _ n os -> ctor (text n) (map pretty os)
With _ o ds -> with (pretty o) (map pretty ds)
where
lambda (Bind _ name argss body []) =
hang
(fsep (map ((text "\\" <>) . fsep . punctuate comma . map text) argss) <+> text "->")
2 (pretty body)
call f [x, y] = x <+> f <+> parens y
call f xs = f <> listOf xs
lMatch az = hang (text "function") 2
(vcat $ [lbrace] ++ az ++ [rbrace])
match o az = hang (text "case" <+> o <+> text "of") 2
(vcat $ [lbrace] ++ az ++ [rbrace])
project o f = o <> text "." <> f
letExpr bs o = vcat $ [lparen] ++ map (nest 2) (bs ++ [o]) ++ [rparen]
ifExpr az = hang (text "if") 2 (vcat $ [lbrace] ++ az ++ [rbrace])
ctor name fs = name <> listOf fs
with x ds = x <+> text "with" <+> blockOf ds
listOf = parens . fsep . punctuate comma
blockOf = braces . fsep . punctuate semi
instance Pretty a => Pretty (Exported a) where
pretty (Exported True a) = pretty a
pretty (Exported _ a) = text "private" <+> pretty a
instance Pretty a => Pretty (Commented a) where
pretty (Commented comms a) = vcat
( [text " "]
++ map ((text "---" <>) . text) comms
++ [pretty a]
)
instance Pretty Decl where
pretty = \case
Binding b -> pretty b
Extend e -> pretty e
Datatype d -> pretty d
instance Pretty Rebind where
pretty (Rebind _ a b)
| a == b = text a
| otherwise = text a <+> text "->" <+> text b
instance Pretty Datatype where
pretty (Data _ name args) = text "type" <+> text name <+> listOf (map text args)
instance Pretty Binding where
pretty (Bind _ n argss body subs) =
hang (text n <+> fsep (map (listOf . map text) argss) <+> equals) 2
$ if null subs
then pretty body
else hang (pretty body) 2 $ vcat (text "where" : map (nest 2 . pretty) subs)
pretty (Open _ x names) = text "open" <+> pretty x <+> listOf (map text names)
pretty (Captured _ names) = text "captured" <+> listOf (map text names)
instance Pretty Extend where
pretty (Method _ ctor args mname bs) =
hang
( text "methods"
<+> text ctor
<+> listOf (map text args)
<> (maybe empty (((space <> text "as") <+>) . text) mname)
<> text ":"
)
2
(vcat $ map pretty bs)
instance Pretty Constant where
pretty = \case
Number n -> text n
String s -> text s
Record fs -> vcat $ []
++ [lbrace]
++ map (nest 2 . pretty) fs
++ [rbrace]
instance Pretty Alt where
pretty (Alt _ ctor rebinds body) =
hang
(text "|" <+> text ctor <+> listOf (map text rebinds) <+> text "->")
2 (pretty body)
instance Pretty IfAlt where
pretty (IfAlt _ cond body) =
hang
(text "|" <+> pretty cond <+> text "?")
2 (pretty body)
instance Pretty Delta where
pretty (Modify x v) = text x <+> text "<-" <+> pretty v
pretty (Set x v) = text x <+> text "=" <+> pretty v
pretty (Remove x) = text x <+> text "removed"
{-# language LambdaCase #-}
module ProgramTree where
import Text.PrettyPrint
data Module
= Module [Commented (Exported Decl)]
data Info
= Info { col, line :: Int, filename :: FilePath }
deriving (Show)
type Name
= String
data AST
= Var Info Name
| Constant Info Constant
| Lambda Info Binding
| Call Info AST [AST]
| LMatch Info [Alt]
| Match Info AST [Alt]
| Project Info AST Name
| Let Info [Commented (Exported Decl)] AST
| If Info [IfAlt]
| Ctor Info Name [AST]
| With Info AST [Commented Delta]
deriving (Show)
data Delta
= Modify Name AST
| Set Name AST
| Remove Name
deriving (Show)
data Exported a = Exported Bool a
deriving (Show)
data Commented a = Commented [String] a
deriving (Show)
data Decl
= Binding Binding
| Extend Extend
| Datatype Datatype
deriving (Show)
data Rebind = Rebind Info Name Name
deriving (Show)
data Datatype = Data Info Name [Name]
deriving (Show)
data Binding
= Bind Info Name [[Name]] AST [Commented Binding]
| Open Info AST [Name]
| Reexport Info AST [Name]
| Captured Info [Name]
deriving (Show)
data Extend
= Method Info Name [Name] (Maybe Name) [Commented Binding]
deriving (Show)
data Constant
= Number String
| String String
| Record [Commented Binding]
deriving (Show)
data Alt = Alt Info Name [Name] AST
deriving (Show)
data IfAlt = IfAlt Info AST AST
deriving (Show)
--- Position of parser.
---
; ctor Position(line, column, offset)
; extend Position(line, col, offset) as it
{
--- This method demonstrates record update syntax.
---
; step-over (char) = if
{
| char == '\n' ? it with { col = 1 ; line <- +(1) }
| else ? it with { col <- +(1) }
}
with { offset <- +(1) }
--- Here we can see the [ab]use of operators.
---
; focus-on-line (text) =
(
; lines = text split-by '\n'
; line = lines at! l
; pre = show(c) + ': '
; offset = c + len(pre)
; marker = ' ' x offset + '^'
; pre + line + '\n' + marker
)
; current-char (text) = text at offset
}
--- Parse results.
; ctor Ok (res, rest)
; ctor Error(err, rest)
--- Basic parsers.
; ok (res) = Parser <| \rest -> Ok (res, rest)
; error(err) = Parser <| \rest -> Error(err, rest)
--- Parser wrapper (run :: stream -> Ok OR Error)
; ctor Parser(run)
--- We will conjure some operators out of Parser namespace.
; run = .run
; then = .then
; catch = .catch
; || = .or
--- Ignore result of the parser on the right side
; <* = .and-
--- Ignore result of the parser on the left side
; *> = .-and
; <?> = .decorate
; extend Parser(run) as parser
{
--- Make a parser that runs current one and if it fails - runs another.
; or(other) = parser.dispatch
(
, Ok
, \_, rest -> other run rest
, Error
)
; then(callback) = parser.dispatch
(
, \res, rest -> callback(res) run rest
, Error
, Error
)
; -and(other) = parser then \_ -> other
; and-(other) =
parser then \x ->
other then \_ ->
ok(x)
; catch(callback) = parser.dispatch
(
, Ok
, \err, rest -> callback(err) run rest
, Error
)
--- Make a new parser that works like current, but don't move caret on failure.
; backtracks = Parser <| \stream ->
case run(stream) of
{
| Ok(res, rest) -> Ok(res, rest)
| Error(err, rest) -> Error(err, stream)
}
--- 0 of more repetitons. Works like * in regexps.
; many = (parser then \x ->
parser.some then \xs ->
Ok(x Push xs)
) || ok(Empty)
--- 1 of more repetitons. Works like + in regexps.
; some =
parser then \x ->
parser.many then \xs ->
ok(x Push xs)
; satisfying (is-good) =
(next then \x -> if
{
| is-good(x) ? ok(x)
| else ? error('<satisfying>')
}
).backtracks
; decorate(msg) = parser catch \e -> error(msg)
; dispatch(yes, no, failed) = Parser <| \stream ->
case run(stream) of
{
| Ok(res, rest) -> yes(res, rest)
| Error(err, rest) -> if
{
| rest == stream ? no(err, rest)
| else ? failed(err, rest)
}
}
}
; ctor CharStream(source, position)
; next = Parser .next
; extend CharStream(src, pos) as s
{
; next = case pos.current-char(text) of
{
| Just(char) -> Ok(char, s with { pos = pos.step-over(char) })
| Nothing -> Error('eof', s)
}
}
module Util where
(|>) :: a -> (a -> b) -> b
(|>) = flip ($)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment