Created
May 22, 2013 07:30
-
-
Save kputnam/5625856 to your computer and use it in GitHub Desktop.
AST, parser, and pretty printer for JS-like language
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
module Example | |
( Term(..) | |
, pretty | |
, parse | |
, testReparse | |
, normalize | |
) where | |
import Prelude hiding (takeWhile) | |
import Data.Text (Text, pack) | |
import Data.Monoid ((<>)) | |
-- Used for parsing | |
import Data.Attoparsec.Expr | |
import Control.Applicative ((<$>), (<|>), (*>), (<*), (<*>)) | |
import Data.Attoparsec.Text (char, decimal, inClass, string, takeWhile, | |
takeWhile1, parseOnly, skipSpace) | |
-- Used for testing | |
import Test.QuickCheck | |
data Term | |
= Var Text | |
| Num Int | |
| Str Text -- "..." | |
| Grp Term -- (a) | |
| Dot Term Term -- a.b | |
| Get Term Term -- a[b] | |
| New Term -- new a | |
| App Term Term -- f(x) | |
| Inc Term -- a ++ | |
| Dec Term -- a -- | |
| Not Term -- !a | |
| Pos Term -- +a | |
| Neg Term -- -a | |
| Exp Term Term -- a ** b | |
| Mul Term Term -- a * b | |
| Div Term Term -- a / b | |
| Mod Term Term -- a % b | |
| Add Term Term -- a + b | |
| Sub Term Term -- a - b | |
| Shl Term Term -- a << b | |
| Shr Term Term -- a >> b | |
| Gt Term Term -- a > b | |
| Lt Term Term -- a < b | |
| Gte Term Term -- a >= b | |
| Lte Term Term -- a <= b | |
| Ne Term Term -- a != b | |
| Eq Term Term -- a == b | |
| Bnd Term Term -- a & b | |
| Bxo Term Term -- a ^ b | |
| Bor Term Term -- a | b | |
| Lnd Term Term -- a && b | |
| Lor Term Term -- a || b | |
| Lxo Term Term -- a ^^ b | |
| Set Term Term -- a := b | |
deriving (Show, Eq) | |
-- Printing | |
-------------------------------------------------------------------------------- | |
pretty :: Term -> Text | |
pretty = walk 100 | |
where | |
walk :: Int -> Term -> Text | |
walk _ (Var x) = x | |
walk _ (Num n) = pack (show n) | |
walk _ (Str s) = pack (show s) | |
walk p (Grp e) = paren p e | |
walk p e@(Dot a b) = lefty p e a "." b | |
walk p e@(Get a b) | |
| p < q = paren q e | |
| otherwise = walk q a <> bracket 100 b | |
where q = prec e | |
walk p e@(New a) = prefix p e "new " a | |
walk p e@(App a b) = lefty p e a "@" b | |
walk p e@(Inc a) = postfix p e a " ++" | |
walk p e@(Dec a) = postfix p e a " --" | |
walk p e@(Not a) = prefix p e "!" a | |
walk p e@(Pos a) = prefix p e "+" a | |
walk p e@(Neg a) = prefix p e "-" a | |
walk p e@(Exp a b) = righty p e a " ** " b | |
walk p e@(Mul a b) = lefty p e a " * " b | |
walk p e@(Div a b) = lefty p e a " / " b | |
walk p e@(Mod a b) = lefty p e a " % " b | |
walk p (Add a b@(Pos _)) | |
= walk p (Add a (Grp b)) | |
walk p e@(Add a b) = lefty p e a " + " b | |
walk p (Sub a b@(Neg _)) | |
= walk p (Sub a (Grp b)) | |
walk p e@(Sub a b) = lefty p e a " - " b | |
walk p e@(Shl a b) = lefty p e a " << " b | |
walk p e@(Shr a b) = lefty p e a " >> " b | |
walk p e@(Ne a b) = lefty p e a " != " b | |
walk p e@(Eq a b) = lefty p e a " == " b | |
walk p e@(Gt a b) = lefty p e a " > " b | |
walk p e@(Lt a b) = lefty p e a " < " b | |
walk p e@(Gte a b) = lefty p e a " >= " b | |
walk p e@(Lte a b) = lefty p e a " <= " b | |
walk p e@(Bnd a b) = lefty p e a " & " b | |
walk p e@(Bxo a b) = lefty p e a " ^ " b | |
walk p e@(Bor a b) = lefty p e a " | " b | |
walk p e@(Lnd a b) = lefty p e a " && " b | |
walk p e@(Lxo a b) = lefty p e a " ^^ " b | |
walk p e@(Lor a b) = lefty p e a " || " b | |
walk p e@(Set a b) = righty p e a " := " b | |
paren :: Int -> Term -> Text | |
paren p e = "(" <> walk p e <> ")" | |
bracket :: Int -> Term -> Text | |
bracket p e = "[" <> walk p e <> "]" | |
prefix :: Int -> Term -> Text -> Term -> Text | |
prefix p e op a | |
| p < q = paren q e | |
| otherwise = op <> special q a | |
where q = prec e | |
postfix :: Int -> Term -> Term -> Text -> Text | |
postfix p e a op | |
| p < q = paren q e | |
| otherwise = special q a <> op | |
where q = prec e | |
lefty :: Int -> Term -> Term -> Text -> Term -> Text | |
lefty p e a op b | |
| p < q = paren q e | |
| otherwise = walk q a <> op <> special q b | |
where q = prec e | |
righty :: Int -> Term -> Term -> Text -> Term -> Text | |
righty p e a op b | |
| p < q = paren q e | |
| otherwise = special q a <> op <> walk q b | |
where q = prec e | |
special :: Int -> Term -> Text | |
special p e | |
| p == prec e = paren p e | |
| otherwise = walk p e | |
prec :: Term -> Int | |
prec (Var _) = 0 | |
prec (Num _) = 0 | |
prec (Str _) = 0 | |
prec (Grp _) = 0 | |
prec (Dot _ _) = 1 | |
prec (Get _ _) = 1 | |
prec (New _) = 2 | |
prec (App _ _) = 3 | |
prec (Inc _) = 4 | |
prec (Dec _) = 4 | |
prec (Exp _ _) = 5 | |
prec (Not _) = 6 | |
prec (Pos _) = 6 | |
prec (Neg _) = 6 | |
prec (Mul _ _) = 7 | |
prec (Div _ _) = 7 | |
prec (Mod _ _) = 7 | |
prec (Add _ _) = 8 | |
prec (Sub _ _) = 8 | |
prec (Shl _ _) = 9 | |
prec (Shr _ _) = 9 | |
prec (Gt _ _) = 10 | |
prec (Lt _ _) = 10 | |
prec (Gte _ _) = 10 | |
prec (Lte _ _) = 10 | |
prec (Ne _ _) = 11 | |
prec (Eq _ _) = 11 | |
prec (Bnd _ _) = 12 | |
prec (Bxo _ _) = 13 | |
prec (Bor _ _) = 14 | |
prec (Lnd _ _) = 15 | |
prec (Lxo _ _) = 16 | |
prec (Lor _ _) = 17 | |
prec (Set _ _) = 18 | |
-- Parsing | |
-------------------------------------------------------------------------------- | |
parse :: Text -> Term | |
parse t = either error id (parseOnly expr t) | |
where | |
expr = buildExpressionParser table term | |
term = next =<< trim (paren expr <|> num <|> var <|> str) | |
next e = (next =<< Get e <$> bracket expr) <|> return e | |
num = Num <$> decimal | |
var = Var <$> takeWhile1 (inClass "a-zA-Z") | |
str = Str <$> (char '"' *> takeWhile (inClass "a-zA-Z0-9 ") <* char '"') | |
table = [[ Infix (op "." Dot) AssocLeft ] | |
,[ Prefix (op "new " New) ] | |
,[ Infix (op "@" App) AssocLeft ] | |
,[ Postfix (op "++" Inc) | |
, Postfix (op "--" Dec) ] | |
,[ Infix (op "**" Exp) AssocRight ] | |
,[ Prefix (op "!" Not) | |
, Prefix (op "+" Pos) | |
, Prefix (op "-" Neg) ] | |
,[ Infix (op "*" Mul) AssocLeft | |
, Infix (op "/" Div) AssocLeft | |
, Infix (op "%" Mod) AssocLeft ] | |
,[ Infix (op "+" Add) AssocLeft | |
, Infix (op "-" Sub) AssocLeft ] | |
,[ Infix (op "<<" Shl) AssocLeft | |
, Infix (op ">>" Shr) AssocLeft ] | |
,[ Infix (op ">=" Gte) AssocLeft | |
, Infix (op "<=" Lte) AssocLeft | |
, Infix (op ">" Gt) AssocLeft | |
, Infix (op "<" Lt) AssocLeft ] | |
,[ Infix (op "!=" Ne) AssocLeft | |
, Infix (op "==" Eq) AssocLeft ] | |
,[ Infix (op "&" Bnd) AssocLeft ] | |
,[ Infix (op "^" Bxo) AssocLeft ] | |
,[ Infix (op "|" Bor) AssocLeft ] | |
,[ Infix (op "&&" Lnd) AssocLeft ] | |
,[ Infix (op "^^" Lxo) AssocLeft ] | |
,[ Infix (op "||" Lor) AssocLeft ] | |
,[ Infix (op ":=" Set) AssocRight ]] | |
op s ctor = trim (string s) *> return ctor | |
trim p = skipSpace *> p <* skipSpace | |
paren p = char '(' *> p <* char ')' | |
bracket p = char '[' *> p <* char ']' | |
-- Testing | |
-------------------------------------------------------------------------------- | |
normalize :: Term -> Term | |
normalize = walk | |
where | |
walk (Var x) = Var x | |
walk (Num n) = Num n | |
walk (Str s) = Str s | |
walk (Grp e) = e | |
walk (Dot a b) = Dot (walk a) (walk b) | |
walk (Get a b) = Get (walk a) (walk b) | |
walk (New a) = New (walk a) | |
walk (App a b) = App (walk a) (walk b) | |
walk (Inc a) = Inc (walk a) | |
walk (Dec a) = Dec (walk a) | |
walk (Not a) = Not (walk a) | |
walk (Pos a) = Pos (walk a) | |
walk (Neg a) = Neg (walk a) | |
walk (Exp a b) = Exp (walk a) (walk b) | |
walk (Mul a (Mul b c)) | |
= Mul (Mul (walk a) (walk b)) (walk c) | |
walk (Mul a b) = Mul (walk a) (walk b) | |
walk (Div a b) = Div (walk a) (walk b) | |
walk (Mod a b) = Mod (walk a) (walk b) | |
walk (Add a (Add b c)) | |
= Add (Add (walk a) (walk b)) (walk c) | |
walk (Add a b) = Add (walk a) (walk b) | |
walk (Sub a b) = Sub (walk a) (walk b) | |
walk (Shl a b) = Shl (walk a) (walk b) | |
walk (Shr a b) = Shr (walk a) (walk b) | |
walk (Ne a b) = Ne (walk a) (walk b) | |
walk (Eq a b) = Eq (walk a) (walk b) | |
walk (Gt a b) = Gt (walk a) (walk b) | |
walk (Lt a b) = Lt (walk a) (walk b) | |
walk (Gte a b) = Gte (walk a) (walk b) | |
walk (Lte a b) = Lte (walk a) (walk b) | |
walk (Bnd a (Bnd b c)) | |
= Add (Add (walk a) (walk b)) (walk c) | |
walk (Bnd a b) = Bnd (walk a) (walk b) | |
walk (Bxo a (Bxo b c)) | |
= Bxo (Bxo (walk a) (walk b)) (walk c) | |
walk (Bxo a b) = Bxo (walk a) (walk b) | |
walk (Bor a (Bor b c)) | |
= Bor (Bor (walk a) (walk b)) (walk c) | |
walk (Bor a b) = Bor (walk a) (walk b) | |
walk (Lnd a (Lnd b c)) | |
= Lnd (Lnd (walk a) (walk b)) (walk c) | |
walk (Lnd a b) = Lnd (walk a) (walk b) | |
walk (Lxo a (Lxo b c)) | |
= Lxo (Lxo (walk a) (walk b)) (walk c) | |
walk (Lxo a b) = Lxo (walk a) (walk b) | |
walk (Lor a (Lor b c)) | |
= Lor (Lor (walk a) (walk b)) (walk c) | |
walk (Lor a b) = Lor (walk a) (walk b) | |
walk (Set a b) = Set (walk a) (walk b) | |
newtype ChrName = ChrName { getChrName :: Char } | |
newtype VarName = VarName { getVarName :: Text } | |
newtype StrText = StrText { getStrText :: Text } | |
instance Arbitrary ChrName where | |
arbitrary = ChrName <$> oneof [choose ('A', 'Z'), choose ('a', 'z')] | |
instance Arbitrary VarName where | |
arbitrary = VarName <$> pack <$> map getChrName <$> arbitrary `suchThat` (not . null) | |
instance Arbitrary StrText where | |
arbitrary = StrText <$> pack <$> map getChrName <$> arbitrary | |
instance Arbitrary Term where | |
arbitrary = frequency [(15, Var <$> getVarName <$> arbitrary) | |
,(15, Num <$> arbitrary `suchThat` (>= 0)) | |
,( 2, Str <$> getStrText <$> arbitrary) | |
,( 1, New <$> arbitrary) | |
,( 4, Get <$> arbitrary <*> arbitrary) | |
,( 3, App <$> arbitrary <*> arbitrary) | |
,( 3, Inc <$> arbitrary) | |
,( 3, Dec <$> arbitrary) | |
,( 3, Not <$> arbitrary) | |
,( 3, Pos <$> arbitrary) | |
,( 3, Neg <$> arbitrary) | |
,( 1, Exp <$> arbitrary <*> arbitrary) | |
,( 1, Mul <$> arbitrary <*> arbitrary) | |
,( 1, Div <$> arbitrary <*> arbitrary) | |
,( 1, Mod <$> arbitrary <*> arbitrary) | |
,( 1, Add <$> arbitrary <*> arbitrary) | |
,( 1, Sub <$> arbitrary <*> arbitrary) | |
,( 1, Shl <$> arbitrary <*> arbitrary) | |
,( 1, Shr <$> arbitrary <*> arbitrary) | |
,( 1, Ne <$> arbitrary <*> arbitrary) | |
,( 1, Eq <$> arbitrary <*> arbitrary) | |
,( 1, Gt <$> arbitrary <*> arbitrary) | |
,( 1, Lt <$> arbitrary <*> arbitrary) | |
,( 1, Gte <$> arbitrary <*> arbitrary) | |
,( 1, Lte <$> arbitrary <*> arbitrary) | |
,( 1, Bnd <$> arbitrary <*> arbitrary) | |
,( 1, Bxo <$> arbitrary <*> arbitrary) | |
,( 1, Bor <$> arbitrary <*> arbitrary) | |
,( 1, Lnd <$> arbitrary <*> arbitrary) | |
,( 1, Lxo <$> arbitrary <*> arbitrary) | |
,( 1, Lor <$> arbitrary <*> arbitrary) | |
,( 1, Set <$> arbitrary <*> arbitrary)] | |
-- parse . pretty == id | |
testReparse :: Term -> Bool | |
testReparse ast = ast == parse (pretty ast) | |
-- Run the test: quickCheck testReparse |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment