-
-
Save heath/b6045f33266b27335bc78e9fbe7d01f9 to your computer and use it in GitHub Desktop.
Parsing, Generating, and Diffing JSON in PureScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
-- | JSON is an incredibly simple format. Even its lists are untyped. | |
-- | As with all languages, functional programming encourages us to | |
-- | make a domain-specific language (or DSL) to capture the "ideas" | |
-- | of the language, which we can then use to talk about its content. | |
-- | In this little snippet, we'll build a JSON DSL, transform it into | |
-- | a recursive structure, and then use that result to generate some | |
-- | JSON output, parse some JSON input, and even diff two trees! | |
import Prelude | |
import Control.Alt ((<|>)) | |
import Control.Monad.Eff (Eff) | |
import Control.Monad.Eff.Console (CONSOLE, log) | |
import Control.Monad.Eff.Exception (EXCEPTION) | |
import Control.Monad.Eff.Random (RANDOM) | |
import Data.Array (fromFoldable, head, nub) | |
import Data.Either (Either, either) | |
import Data.Eq (class Eq1) | |
import Data.Foldable (class Foldable, any, find, foldMap, foldlDefault, foldrDefault) | |
import Data.Functor.Compose (Compose(..)) | |
import Data.Functor.Mu (Mu(..), unroll) | |
import Data.Lazy (Lazy, defer, force) | |
import Data.Maybe (Maybe, fromJust) | |
import Data.Monoid (mempty) | |
import Data.Newtype (class Newtype, unwrap) | |
import Data.NonEmpty ((:|)) | |
import Data.StrMap as SM | |
import Data.String (joinWith) | |
import Data.Traversable (class Traversable, sequence, sequenceDefault, traverse) | |
import Data.Tuple (Tuple(..)) | |
import Partial.Unsafe (unsafePartial) | |
import Test.QuickCheck (quickCheck, withHelp, (===)) | |
import Test.QuickCheck.Arbitrary (class Arbitrary, arbitrary) | |
import Test.QuickCheck.Gen (Gen, elements) | |
import Text.Parsing.Simple (Parser, parse, sepBy) | |
import Text.Parsing.Simple as S | |
-- | Here, if I'm correct, is the entire JSON language spec. Forget | |
-- | the syntax for a minute: this data type captures every idea we | |
-- | could express with JSON. Notice our `f` parameter here just sits | |
-- | where we would expect the recursion to happen. | |
data JsonF f | |
= JNull | |
| JBool Boolean | |
| JInt Int | |
| JNum Number | |
| JStr String | |
| JList (Array f) | |
| JObject (SM.StrMap f) | |
derive instance eqJsonF :: Eq f => Eq (JsonF f) | |
instance showJsonF :: Show f => Show (JsonF f) where | |
show JNull = "null" | |
show (JBool b) = show b | |
show (JInt i) = show i | |
show (JNum n) = show n | |
show (JStr s) = show s | |
show (JList l) = show l | |
show (JObject o) = show o | |
-- | In fact, because of that `f`, we can get a `Functor` instance for | |
-- | no extra cost, which is neat. If this functor seems useless (and, | |
-- | on its own, it really is), don't worry: all will become clear. | |
derive instance functorJsonF :: Functor JsonF | |
-- | We're not going to spend too much time on this typeclass. Just so | |
-- | we can move on: `Eq1` lets us check that two `Json` objects are | |
-- | equal. | |
instance eq1JsonF :: Eq1 JsonF where | |
eq1 = eq | |
-- | There isn't really any information worth collecting in our JsonF | |
-- | type in practice, but having a `Foldable` instance lets us build | |
-- | a `Traversable` instance. | |
instance foldableJsonF :: Foldable JsonF where | |
foldMap f (JList xs) = foldMap f xs | |
foldMap f (JObject xs) = foldMap f xs | |
foldMap f _ = mempty | |
foldr f = foldrDefault f | |
foldl f = foldlDefault f | |
-- | Not that it's required for parsing/generating, but our JsonF is | |
-- | a pretty straightforward `Traversable` type. This will be useful | |
-- | when we get to the diffing stuff later on. | |
instance traversableJsonF :: Traversable JsonF where | |
sequence xs = sequenceDefault xs | |
traverse :: forall f a b. | |
Applicative f | |
=> (a -> f b) | |
-> JsonF a | |
-> f (JsonF b) | |
traverse f (JList xs) = JList <$> traverse f xs | |
traverse f (JObject xs) = JObject <$> traverse f xs | |
traverse _ JNull = pure JNull | |
traverse _ (JBool b) = pure (JBool b) | |
traverse _ (JInt i) = pure (JInt i) | |
traverse _ (JNum n) = pure (JNum n) | |
traverse _ (JStr s) = pure (JStr s) | |
-- | `Mu` gives us the "fixed point" of `JsonF`. WHat this means in | |
-- | English is that it takes `JsonF f` and sets `f` to `JsonF f`. At | |
-- | first glance, that gives us `JsonF (JsonF (JsonF ...` forever, | |
-- | but `Mu` takes care of the type and stops this nonsense. | |
type Json = Mu JsonF | |
-- | With our `Mu` representation, we have to "lift" our `JsonF` | |
-- | constructors for our `Json` type: | |
fromBool :: Boolean -> Json | |
fromBool = In <<< JBool | |
fromInt :: Int -> Json | |
fromInt = In <<< JInt | |
fromNull :: Json | |
fromNull = In JNull | |
fromNum :: Number -> Json | |
fromNum = In <<< JNum | |
fromStr :: String -> Json | |
fromStr = In <<< JStr | |
fromList :: Array Json -> Json | |
fromList = In <<< JList | |
fromObj :: SM.StrMap Json -> Json | |
fromObj = In <<< JObject | |
-- | As an example, here's some JSON that we have constructed with our | |
-- | neat new `Fix` type! | |
exampleJSON :: Json | |
exampleJSON | |
= fromObj $ SM.fromFoldable | |
[ Tuple "id" (fromInt 1) | |
, Tuple "name" (fromStr "Katya") | |
, Tuple "friends" | |
( fromList $ fromFoldable | |
[ fromStr "Trixie" | |
, fromStr "Alaska" | |
, fromStr "Ginger" | |
] | |
) | |
, Tuple "height" (fromNum 177.8) | |
] | |
-- | Conversion to JSON is remarkably straightforward; we know how to | |
-- | convert each individual piece, so we just recurse on the `f`, as | |
-- | we know it'll be `JsonF JSON`, which makes this definition pretty | |
-- | neat, in my opinion. | |
toJson :: Json -> String | |
toJson | |
= go <<< unroll | |
where | |
go :: JsonF Json -> String | |
go = case _ of | |
JBool b -> if b then "true" else "false" | |
JInt i -> show i | |
JNull -> "null" | |
JNum n -> show n | |
JStr s -> show s | |
JList xs -> "[" <> joinWith "," (toJson <$> xs) <> "]" | |
JObject os -> "{" <> joinWith "," (prepare <$> SM.toUnfoldable os) <> "}" | |
prepare :: Tuple String Json -> String | |
prepare (Tuple k v) | |
= show k <> ":" <> toJson v | |
-- | Parsing JSON is also pretty straightforward, as we can just go | |
-- | down through the `Mu` levels. There is a little hiccough here: as | |
-- | PureScript is eagerly evaluated, we can't have co-dependence in | |
-- | our `where` functions. So, we'll cover it in `Lazy` and hope for | |
-- | the best, right? Bear in mind that this parser is REALLY naïve: | |
-- | really, we should accountfor whitespace, at the very least. | |
json :: String -> Either String Json | |
json string | |
= parse (force parseJson) string | |
where | |
parseStr :: Parser String String | |
parseStr | |
= (\_ x _ -> x) | |
<$> S.char '"' | |
<*> S.tail | |
<*> S.char '"' | |
parseList :: Lazy (Parser String (Array Json)) | |
parseList | |
= defer \_ -> | |
fromFoldable <$> | |
S.brackets (force parseJson `sepBy` S.char ',') | |
parseObj :: Lazy (Parser String (SM.StrMap Json)) | |
parseObj | |
= defer \_ -> | |
S.braces $ map SM.fromFoldable | |
(force pair `sepBy` S.char ',') | |
where | |
pair :: Lazy (Parser String (Tuple String Json)) | |
pair | |
= defer \_ -> | |
Tuple <$> parseStr | |
<*> (S.char ':' *> force parseJson) | |
parseJson :: Lazy (Parser String Json) | |
parseJson | |
= defer \_ -> map In $ | |
(JBool true <$ S.string "true" ) | |
<|> (JBool false <$ S.string "false") | |
<|> (JNull <$ S.string "null" ) | |
<|> (JNum <$> S.number ) | |
<|> (JInt <$> S.int ) | |
<|> (JStr <$> parseStr ) | |
<|> (JList <$> force parseList ) | |
<|> (JObject <$> force parseObj ) | |
-- | Now we have a notion of parsing and generating JSON based on our | |
-- | little `Mu`, let's see what other interesting fixed points we can | |
-- | compute. When we "diff" two JSON structures, what we'll do is, at | |
-- | each "functor level", store an `Array` of all the unique values | |
-- | that have been there. Basically, this will build up a "diff tree" | |
-- | as we diff several `JsonDiff` values together. | |
type JsonDiff = (Mu (Compose Array JsonF)) | |
-- | A `Json` type is a `JsonDiff` where only one thing ever happened. | |
-- | It's a tenuous explanation, and I'm sorry, but it's good enough! | |
toDiff :: Json -> JsonDiff | |
toDiff (In xs) = (In (Compose [toDiff <$> xs])) | |
-- | Here, we'll just pull out the first thing we ever diffed. If you | |
-- | want to make sure this works, you can hide the constructor, and | |
-- | then safely use `unsafePartial`. | |
fromDiff :: JsonDiff -> Maybe Json | |
fromDiff (In (Compose xs)) = do | |
first <- head xs | |
fixed <- sequence (map fromDiff first) | |
pure $ In fixed | |
-- | JList is an interesting case for diffing, so we'll need to be | |
-- | able to spot one. | |
isJList :: forall f. JsonF f -> Boolean | |
isJList (JList _) = true | |
isJList _ = false | |
-- | Similarly for JObject, too - we'll use some special logic. | |
isJObject :: forall f. JsonF f -> Boolean | |
isJObject (JObject _) = true | |
isJObject _ = false | |
-- | Diff two `JsonDiff` values to produce a `JsonDiff`. If we had | |
-- | bothered to wrap up `JsonDiff` in a `newtype`, this would be a | |
-- | perfectly sensible implementation for `Semimgroup`. Incidentally, | |
-- | The `Monoid` identity would be `In (Compose [])`! | |
diff :: JsonDiff -> JsonDiff -> JsonDiff | |
diff (In (Compose xs)) (In (Compose ys)) | |
= go xs ys | |
where | |
go :: Array (JsonF JsonDiff) -> Array (JsonF JsonDiff) | |
-> JsonDiff | |
go xs' ys' | |
-- If exactly equal, we don't care at all! | |
| xs' == ys' | |
= In (Compose xs') | |
-- If we spot two lists, we'll recursively diff them. | |
| any isJList xs' && any isJList ys' | |
= unsafePartial (fromJust mergeJLists) | |
-- Same goes for objects, of course! | |
| any isJObject xs' && any isJObject ys' | |
= unsafePartial (fromJust mergeJObjects) | |
-- If they're _not_ equal, concat the lists and de-dupe. It's | |
-- not the most efficient approach, but it's good enough for the | |
-- example. | |
| otherwise = In (Compose (nub $ xs' <> ys')) | |
-- How do we merge `JList`s?! | |
mergeJLists :: Maybe JsonDiff | |
mergeJLists = do | |
x <- find isJList xs | |
y <- find isJList ys | |
if x /= y | |
then pure (In (Compose [x, y])) | |
else pure (In (Compose [x])) | |
-- Ok, but what about `JObject`s?! | |
mergeJObjects :: Maybe JsonDiff | |
mergeJObjects = do | |
x <- find isJList xs | |
y <- find isJList ys | |
if x /= y | |
then pure (In (Compose [x, y])) | |
else pure (In (Compose [x])) | |
-- | Before we go, let's print out our `exampleJSON` to prove that I | |
-- | didn't make it all up. | |
main :: forall eff. | |
Eff | |
( console :: CONSOLE | |
, exception :: EXCEPTION | |
, random :: RANDOM | |
| eff | |
) Unit | |
main | |
= do | |
log $ toJson exampleJSON | |
-- While we're here, let's test what we have. The below code generates | |
-- some random test data that we can convert to and from JSON. If the | |
-- before equals the after in all cases, we can pretty safely assume | |
-- that we've won, right? | |
quickCheck \(ArbJson test) -> | |
either (withHelp false) (test === _) | |
$ json $ toJson test | |
newtype ArbJson = ArbJson Json | |
derive instance newtypeArbJson :: Newtype ArbJson _ | |
instance arbitraryJson :: Arbitrary ArbJson where | |
arbitrary = ArbJson <$> do | |
bool <- JBool <$> arbitrary | |
int <- JInt <$> arbitrary | |
num <- JNum <$> arbitrary | |
str <- JStr <$> arbitrary | |
list <- JList <$> (map unwrap <$> (arbitrary :: Gen (Array ArbJson))) | |
object <- JObject <$> SM.fromFoldable | |
<$> (map (map unwrap) <$> | |
(arbitrary :: Gen (Array (Tuple String ArbJson)))) | |
In <$> (elements (JNull :| [bool, int, num, str, list, object])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment