Last active
December 8, 2016 18:11
-
-
Save michaelficarra/8ad559526c1602bd02dde021105a5da1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git c/purescript.cabal i/purescript.cabal | |
index 4f4fcabd..699e8440 100644 | |
--- c/purescript.cabal | |
+++ i/purescript.cabal | |
@@ -231,6 +231,7 @@ library | |
Language.PureScript.Sugar.TypeClasses | |
Language.PureScript.Sugar.TypeClasses.Deriving | |
Language.PureScript.Sugar.TypeDeclarations | |
+ Language.PureScript.Terms | |
Language.PureScript.Traversals | |
Language.PureScript.TypeChecker | |
Language.PureScript.TypeChecker.Entailment | |
diff --git c/src/Language/PureScript/AST/Literals.hs i/src/Language/PureScript/AST/Literals.hs | |
index 3a456237..d0a6de27 100644 | |
--- c/src/Language/PureScript/AST/Literals.hs | |
+++ i/src/Language/PureScript/AST/Literals.hs | |
@@ -5,6 +5,7 @@ module Language.PureScript.AST.Literals where | |
import Prelude.Compat | |
import Data.Text (Text) | |
+import Language.PureScript.Terms (PSString) | |
-- | | |
-- Data type for literal values. Parameterised so it can be used for Exprs and | |
@@ -18,7 +19,7 @@ data Literal a | |
-- | | |
-- A string literal | |
-- | |
- | StringLiteral Text | |
+ | StringLiteral PSString | |
-- | | |
-- A character literal | |
-- | |
diff --git c/src/Language/PureScript/CodeGen/JS/AST.hs i/src/Language/PureScript/CodeGen/JS/AST.hs | |
index 5f124dd2..3970e8f5 100644 | |
--- c/src/Language/PureScript/CodeGen/JS/AST.hs | |
+++ i/src/Language/PureScript/CodeGen/JS/AST.hs | |
@@ -11,6 +11,7 @@ import Data.Text (Text) | |
import Language.PureScript.AST (SourceSpan(..)) | |
import Language.PureScript.Comments | |
+import Language.PureScript.Terms (PSString) | |
import Language.PureScript.Traversals | |
-- | | |
@@ -132,7 +133,7 @@ data JS | |
-- | | |
-- A string literal | |
-- | |
- | JSStringLiteral (Maybe SourceSpan) Text | |
+ | JSStringLiteral (Maybe SourceSpan) PSString | |
-- | | |
-- A boolean literal | |
-- | |
@@ -160,7 +161,7 @@ data JS | |
-- | | |
-- An object property accessor expression | |
-- | |
- | JSAccessor (Maybe SourceSpan) Text JS | |
+ | JSAccessor (Maybe SourceSpan) PSString JS | |
-- | | |
-- A function introduction (optional name, arguments, body) | |
-- | |
diff --git c/src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs i/src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs | |
index 01a41cae..c3405542 100644 | |
--- c/src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs | |
+++ i/src/Language/PureScript/CodeGen/JS/Optimizer/Common.hs | |
@@ -11,6 +11,7 @@ import Data.Maybe (fromMaybe) | |
import Language.PureScript.Crash | |
import Language.PureScript.CodeGen.JS.AST | |
+import Language.PureScript.Terms (mkString) | |
applyAll :: [a -> a] -> a -> a | |
applyAll = foldl' (.) id | |
@@ -71,13 +72,13 @@ removeFromBlock _ js = js | |
isFn :: (Text, Text) -> JS -> Bool | |
isFn (moduleName, fnName) (JSAccessor _ x (JSVar _ y)) = | |
- x == fnName && y == moduleName | |
+ x == mkString fnName && y == moduleName | |
isFn (moduleName, fnName) (JSIndexer _ (JSStringLiteral _ x) (JSVar _ y)) = | |
- x == fnName && y == moduleName | |
+ x == mkString fnName && y == moduleName | |
isFn _ _ = False | |
isDict :: (Text, Text) -> JS -> Bool | |
-isDict (moduleName, dictName) (JSAccessor _ x (JSVar _ y)) = x == dictName && y == moduleName | |
+isDict (moduleName, dictName) (JSAccessor _ x (JSVar _ y)) = x == mkString dictName && y == moduleName | |
isDict _ _ = False | |
isDict' :: [(Text, Text)] -> JS -> Bool | |
diff --git c/src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs i/src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs | |
index fdc482a3..7f144b75 100644 | |
--- c/src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs | |
+++ i/src/Language/PureScript/CodeGen/JS/Optimizer/Inliner.hs | |
@@ -23,6 +23,7 @@ import qualified Data.Text as T | |
import Language.PureScript.CodeGen.JS.AST | |
import Language.PureScript.CodeGen.JS.Optimizer.Common | |
import qualified Language.PureScript.Constants as C | |
+import Language.PureScript.Terms (mkString) | |
-- TODO: Potential bug: | |
-- Shouldn't just inline this case: { var x = 0; x.toFixed(10); } | |
@@ -213,7 +214,7 @@ inlineCommonOperators = applyAll $ | |
isNFn :: Text -> Int -> JS -> Bool | |
isNFn prefix n (JSVar _ name) = name == (prefix <> T.pack (show n)) | |
- isNFn prefix n (JSAccessor _ name (JSVar _ dataFunctionUncurried)) | dataFunctionUncurried == C.dataFunctionUncurried = name == (prefix <> T.pack (show n)) | |
+ isNFn prefix n (JSAccessor _ name (JSVar _ dataFunctionUncurried)) | dataFunctionUncurried == C.dataFunctionUncurried = name == mkString (prefix <> T.pack (show n)) | |
isNFn _ _ _ = False | |
runFn :: Int -> JS -> JS | |
@@ -235,11 +236,11 @@ inlineCommonOperators = applyAll $ | |
convert other = other | |
isModFn :: (Text, Text) -> JS -> Bool | |
- isModFn (m, op) (JSAccessor _ op' (JSVar _ m')) = m == m' && op == op' | |
+ isModFn (m, op) (JSAccessor _ op' (JSVar _ m')) = m == m' && mkString op == op' | |
isModFn _ _ = False | |
isModFnWithDict :: (Text, Text) -> JS -> Bool | |
- isModFnWithDict (m, op) (JSApp _ (JSAccessor _ op' (JSVar _ m')) [(JSVar _ _)]) = m == m' && op == op' | |
+ isModFnWithDict (m, op) (JSApp _ (JSAccessor _ op' (JSVar _ m')) [JSVar _ _]) = m == m' && mkString op == op' | |
isModFnWithDict _ _ = False | |
-- (f <<< g $ x) = f (g x) | |
diff --git c/src/Language/PureScript/CodeGen/JS/Optimizer/MagicDo.hs i/src/Language/PureScript/CodeGen/JS/Optimizer/MagicDo.hs | |
index 8fb82abb..96f2a215 100644 | |
--- c/src/Language/PureScript/CodeGen/JS/Optimizer/MagicDo.hs | |
+++ i/src/Language/PureScript/CodeGen/JS/Optimizer/MagicDo.hs | |
@@ -12,6 +12,7 @@ import Data.Maybe (fromJust, isJust) | |
import Language.PureScript.CodeGen.JS.AST | |
import Language.PureScript.CodeGen.JS.Optimizer.Common | |
import Language.PureScript.Options | |
+import Language.PureScript.Terms (mkString) | |
import qualified Language.PureScript.Constants as C | |
magicDo :: Options -> JS -> JS | |
@@ -67,7 +68,7 @@ magicDo' = everywhereOnJS undo . everywhereOnJSTopDown convert | |
-- Check if an expression represents the polymorphic pure or return function | |
isPurePoly = isFn (C.controlApplicative, C.pure') | |
-- Check if an expression represents a function in the Eff module | |
- isEffFunc name (JSAccessor _ name' (JSVar _ eff)) = eff == C.eff && name == name' | |
+ isEffFunc name (JSAccessor _ name' (JSVar _ eff)) = eff == C.eff && mkString name == name' | |
isEffFunc _ _ = False | |
-- Remove __do function applications which remain after desugaring | |
@@ -106,14 +107,14 @@ inlineST = everywhereOnJS convertBlock | |
convert agg (JSApp s1 f [arg]) | isSTFunc C.newSTRef f = | |
JSFunction s1 Nothing [] (JSBlock s1 [JSReturn s1 $ if agg then arg else JSObjectLiteral s1 [(C.stRefValue, arg)]]) | |
convert agg (JSApp _ (JSApp s1 f [ref]) []) | isSTFunc C.readSTRef f = | |
- if agg then ref else JSAccessor s1 C.stRefValue ref | |
+ if agg then ref else JSAccessor s1 (mkString C.stRefValue) ref | |
convert agg (JSApp _ (JSApp _ (JSApp s1 f [ref]) [arg]) []) | isSTFunc C.writeSTRef f = | |
- if agg then JSAssignment s1 ref arg else JSAssignment s1 (JSAccessor s1 C.stRefValue ref) arg | |
+ if agg then JSAssignment s1 ref arg else JSAssignment s1 (JSAccessor s1 (mkString C.stRefValue) ref) arg | |
convert agg (JSApp _ (JSApp _ (JSApp s1 f [ref]) [func]) []) | isSTFunc C.modifySTRef f = | |
- if agg then JSAssignment s1 ref (JSApp s1 func [ref]) else JSAssignment s1 (JSAccessor s1 C.stRefValue ref) (JSApp s1 func [JSAccessor s1 C.stRefValue ref]) | |
+ if agg then JSAssignment s1 ref (JSApp s1 func [ref]) else JSAssignment s1 (JSAccessor s1 (mkString C.stRefValue) ref) (JSApp s1 func [JSAccessor s1 (mkString C.stRefValue) ref]) | |
convert _ other = other | |
-- Check if an expression represents a function in the ST module | |
- isSTFunc name (JSAccessor _ name' (JSVar _ st)) = st == C.st && name == name' | |
+ isSTFunc name (JSAccessor _ name' (JSVar _ st)) = st == C.st && mkString name == name' | |
isSTFunc _ _ = False | |
-- Find all ST Refs initialized in this block | |
findSTRefsIn = everythingOnJS (++) isSTRef | |
diff --git c/src/Language/PureScript/Parser/Lexer.hs i/src/Language/PureScript/Parser/Lexer.hs | |
index cbe90f5f..0bea4073 100644 | |
--- c/src/Language/PureScript/Parser/Lexer.hs | |
+++ i/src/Language/PureScript/Parser/Lexer.hs | |
@@ -248,18 +248,6 @@ parseToken = P.choice | |
symbolChar :: Lexer u Char | |
symbolChar = P.satisfy isSymbolChar | |
- surrogates :: Char -> (Char, Char) | |
- surrogates c = (high, low) | |
- where | |
- (h, l) = divMod (fromEnum c - 0x10000) 0x400 | |
- high = toEnum (h + 0xD800) | |
- low = toEnum (l + 0xDC00) | |
- | |
- expandAstralCodePointToUTF16Surrogates :: Char -> [Char] | |
- expandAstralCodePointToUTF16Surrogates c | fromEnum c > 0xFFFF = [high, low] | |
- where (high, low) = surrogates c | |
- expandAstralCodePointToUTF16Surrogates c = [c] | |
- | |
parseCharLiteral :: Lexer u Char | |
parseCharLiteral = P.try $ do { | |
c <- PT.charLiteral tokenParser; | |
@@ -269,10 +257,10 @@ parseToken = P.choice | |
} | |
parseStringLiteral :: Lexer u Text | |
- parseStringLiteral = blockString <|> T.pack <$> concatMap expandAstralCodePointToUTF16Surrogates <$> PT.stringLiteral tokenParser | |
+ parseStringLiteral = T.pack <$> (blockString <|> PT.stringLiteral tokenParser) | |
where | |
delimiter = P.try (P.string "\"\"\"") | |
- blockString = delimiter *> (T.pack <$> P.manyTill P.anyChar delimiter) | |
+ blockString = delimiter *> P.manyTill P.anyChar delimiter | |
parseNumber :: Lexer u (Either Integer Double) | |
parseNumber = (consumeLeadingZero *> P.parserZero) <|> | |
diff --git c/src/Language/PureScript/Pretty/JS.hs i/src/Language/PureScript/Pretty/JS.hs | |
index d142873f..7fdc5463 100644 | |
--- c/src/Language/PureScript/Pretty/JS.hs | |
+++ i/src/Language/PureScript/Pretty/JS.hs | |
@@ -25,8 +25,7 @@ import Language.PureScript.CodeGen.JS.Common | |
import Language.PureScript.Comments | |
import Language.PureScript.Crash | |
import Language.PureScript.Pretty.Common | |
- | |
-import Numeric | |
+import Language.PureScript.Terms | |
-- TODO (Christoph): Get rid of T.unpack / pack | |
@@ -59,7 +58,7 @@ literals = mkPattern' match' | |
] | |
where | |
objectPropertyToString :: (Emit gen) => Text -> gen | |
- objectPropertyToString s | identNeedsEscaping s = string s | |
+ objectPropertyToString s | identNeedsEscaping s = string (mkString s) | |
| otherwise = emit s | |
match (JSBlock _ sts) = mconcat <$> sequence | |
[ return $ emit "{\n" | |
@@ -150,28 +149,8 @@ literals = mkPattern' match' | |
match (JSRaw _ js) = return $ emit js | |
match _ = mzero | |
-string :: (Emit gen) => Text -> gen | |
-string s = emit $ "\"" <> T.concatMap encodeChar s <> "\"" | |
- where | |
- encodeChar :: Char -> Text | |
- encodeChar '\b' = "\\b" | |
- encodeChar '\t' = "\\t" | |
- encodeChar '\n' = "\\n" | |
- encodeChar '\v' = "\\v" | |
- encodeChar '\f' = "\\f" | |
- encodeChar '\r' = "\\r" | |
- encodeChar '"' = "\\\"" | |
- encodeChar '\\' = "\\\\" | |
- -- PureScript strings are sequences of UTF-16 code units, so this case should never be hit. | |
- -- If it is somehow hit, though, output the designated Unicode replacement character U+FFFD. | |
- encodeChar c | fromEnum c > 0xFFFF = "\\uFFFD" | |
- encodeChar c | fromEnum c > 0xFFF = "\\u" <> showHex' (fromEnum c) "" | |
- encodeChar c | fromEnum c > 0xFF = "\\u0" <> showHex' (fromEnum c) "" | |
- encodeChar c | fromEnum c < 0x10 = "\\x0" <> showHex' (fromEnum c) "" | |
- encodeChar c | fromEnum c > 0x7E || fromEnum c < 0x20 = "\\x" <> showHex' (fromEnum c) "" | |
- encodeChar c = T.singleton c | |
- | |
- showHex' a b = T.pack (showHex a b) | |
+string :: (Emit gen) => PSString -> gen | |
+string = emit . renderJSON | |
conditional :: Pattern PrinterState JS ((Maybe SourceSpan, JS, JS), JS) | |
conditional = mkPattern match | |
diff --git c/src/Language/PureScript/Terms.hs i/src/Language/PureScript/Terms.hs | |
new file mode 100644 | |
index 00000000..a29d7342 | |
--- /dev/null | |
+++ i/src/Language/PureScript/Terms.hs | |
@@ -0,0 +1,77 @@ | |
+{-# LANGUAGE GeneralizedNewtypeDeriving #-} | |
+ | |
+-- | | |
+-- Data types for PureScript terms | |
+-- | |
+module Language.PureScript.Terms (PSString, mkString, renderJSON, toUTF16CodeUnits) where | |
+ | |
+import qualified Data.Aeson as A | |
+import Prelude.Compat | |
+import Data.Monoid ((<>)) | |
+import Data.Word | |
+import Data.Text (Text) | |
+import qualified Data.Text as T | |
+import Data.Vector.Unboxed (Vector) | |
+import qualified Data.Vector.Unboxed as V | |
+ | |
+import Numeric (showHex) | |
+ | |
+-- | | |
+-- Strings in PureScript are sequences of UTF-16 code units, which do not | |
+-- necessarily represent UTF-16 encoded text. For example, it is permissible | |
+-- for a string to contain *lone surrogates,* i.e. characters in the range | |
+-- 0xD800 - 0xDFFF which do not appear as a part of a surrogate pair. | |
+newtype PSString = PSString (Vector Word16) | |
+ deriving (Eq, Ord) | |
+ | |
+instance Show PSString where | |
+ show = T.unpack . renderJSON | |
+ | |
+instance A.ToJSON PSString where | |
+ toJSON = A.toJSON . toString | |
+ where | |
+ toString :: PSString -> String | |
+ toString (PSString s) = toChar <$> V.toList s | |
+ | |
+renderJSON :: PSString -> Text | |
+renderJSON s = T.pack $ "\"" <> concatMap encodeChar (toUTF16CodeUnits s) <> "\"" | |
+ where | |
+ encodeChar :: Word16 -> String | |
+ encodeChar c | c > 0xFFF = "\\u" <> showHex c "" | |
+ encodeChar c | c > 0xFF = "\\u0" <> showHex c "" | |
+ encodeChar c | c < 0x10 = "\\x0" <> showHex c "" | |
+ encodeChar c | c > 0x7E || c < 0x20 = "\\x" <> showHex c "" | |
+ encodeChar c | toChar c == '\b' = "\\b" | |
+ encodeChar c | toChar c == '\t' = "\\t" | |
+ encodeChar c | toChar c == '\n' = "\\n" | |
+ encodeChar c | toChar c == '\v' = "\\v" | |
+ encodeChar c | toChar c == '\f' = "\\f" | |
+ encodeChar c | toChar c == '\r' = "\\r" | |
+ encodeChar c | toChar c == '"' = "\\\"" | |
+ encodeChar c | toChar c == '\\' = "\\\\" | |
+ encodeChar c = pure $ toChar c | |
+ | |
+instance A.FromJSON PSString where | |
+ parseJSON o = mkString <$> A.parseJSON o | |
+ | |
+toChar :: Word16 -> Char | |
+toChar = toEnum . fromIntegral | |
+ | |
+toWord :: Int -> Word16 | |
+toWord = fromIntegral | |
+ | |
+surrogates :: Char -> (Word16, Word16) | |
+surrogates c = (toWord (h + 0xD800), toWord (l + 0xDC00)) | |
+ where | |
+ (h, l) = divMod (fromEnum c - 0x10000) 0x400 | |
+ | |
+encodeUTF16 :: Char -> [Word16] | |
+encodeUTF16 c | fromEnum c > 0xFFFF = [high, low] | |
+ where (high, low) = surrogates c | |
+encodeUTF16 c = [toWord $ fromEnum c] | |
+ | |
+mkString :: Text -> PSString | |
+mkString = PSString . V.fromList . concatMap encodeUTF16 . T.unpack | |
+ | |
+toUTF16CodeUnits :: PSString -> [Word16] | |
+toUTF16CodeUnits (PSString str) = V.toList str |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment