Skip to content

Instantly share code, notes, and snippets.

@aaronlevin
Last active August 30, 2019 02:49
Show Gist options
  • Save aaronlevin/4aa22bd9c79997029167 to your computer and use it in GitHub Desktop.
Save aaronlevin/4aa22bd9c79997029167 to your computer and use it in GitHub Desktop.
Proxy Serialization Post
{-# LANGUAGE OverloadedStrings #-}
module SimpleThings where
import Control.Applicative ((<$>), (<*>))
import Control.Monad (mzero)
import Data.Aeson
import qualified Data.Aeson as A
-- | sum type containing all possible payloads
data Payloads = FooPayload Int String
| InviteRequestPayload String
| IdentityValidationRequestPayload Int Int String
-- | dispatch on the value of `"msg_type"`
instance FromJSON Payloads where
parseJSON (Object v) = v .: "msg_type" >>= handlePayloadType
where
-- handle foo_payload key
handlePayloadType (A.String "foo_payload") =
v .: "data" >>= \sub -> FooPayload <$> sub .: "id" <*> sub .: "msg"
-- handle invite_request key
handlePayloadType (A.String "invite_request") =
v .: "data" >>= \sub -> InviteRequestPayload <$> sub .: "msg"
-- handle identity_validation_data key
handlePayloadType (A.String "identity_validation_data") =
v .: "data" >>= \sub -> IdentityValidationRequestPayload <$> sub .: "from"
<*> sub .: "to"
<*> sub .: "validation_msg"
-- default
handlePayloadType _ = mzero
parseJSON _ = mzero
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
module Proxy where
-- import Control.Applicative ((<$>), (<*>))
import Control.Monad (mzero)
import Data.Aeson
import qualified Data.Aeson as A
import qualified Data.ByteString.Lazy as BL
import Data.Proxy (Proxy(Proxy))
import GHC.TypeLits (symbolVal)
instance ToJSON (Proxy "foo") where
toJSON p = object [ "type" .= symbolVal p ]
instance FromJSON (Proxy "foo") where
parseJSON (Object v) = v .: "type" >>= handleType
where
handleType (A.String "foo") = return (Proxy :: Proxy "foo")
handleType _ = mzero
parseJSON _ = mzero
jsonString :: BL.ByteString
jsonString = "{\"type\": \"foo\"}"
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Proxy where
-- import Control.Applicative ((<$>), (<*>))
import Control.Monad (mzero)
import Data.Aeson
import qualified Data.Aeson as A
import qualified Data.ByteString.Lazy as BL
import Data.Proxy (Proxy(Proxy))
import GHC.TypeLits (KnownSymbol, symbolVal)
import Data.Text (pack)
instance KnownSymbol s => ToJSON (Proxy s) where
toJSON p = object [ "type" .= symbolVal p ]
instance KnownSymbol s => FromJSON (Proxy s) where
parseJSON (Object v) = v .: "type" >>= handleType
where
handleType (A.String s) | s == pack (symbolVal (Proxy :: Proxy s)) = return (Proxy :: Proxy s)
handleType _ = mzero
parseJSON _ = mzero
jsonString :: BL.ByteString
jsonString = "{\"type\": \"foo\"}"
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Proxy where
import Control.Applicative ((<$>))
import Control.Monad (mzero)
import Data.Aeson
import Data.Aeson.Types
import qualified Data.Aeson as A
import qualified Data.ByteString.Lazy as BL
import Data.Monoid ((<>))
import Data.Proxy (Proxy(Proxy))
import GHC.TypeLits (KnownSymbol, Symbol, symbolVal)
import Data.Text (pack)
-- | Instances for serializing Proxy
instance KnownSymbol s => ToJSON (Proxy s) where
toJSON = A.String . pack . symbolVal
instance KnownSymbol s => FromJSON (Proxy s) where
parseJSON (A.String s) | s == pack (symbolVal (Proxy :: Proxy s)) = return (Proxy :: Proxy s)
parseJSON _ = mzero
-- | our new data type.
newtype Payload (s :: Symbol) a = Payload a
-- | ToJSON instance
instance (KnownSymbol s, ToJSON a) => ToJSON (Payload s a) where
toJSON (Payload a) = object [ "type" .= (Proxy :: Proxy s)
, "data" .= a
]
-- | FromJSON instance
instance (KnownSymbol s, FromJSON a) => FromJSON (Payload s a) where
parseJSON (Object v) = (v .: "type" :: Parser (Proxy s))
>>
Payload <$> v .: "data"
parseJSON _ = mzero
-- | Show intance for ghci
instance (KnownSymbol s, Show a) => Show (Payload s a) where
show (Payload a) = "Payload " <> symbolVal (Proxy :: Proxy s) <> " " <> show a
jsonString :: BL.ByteString
jsonString = "{\"type\": \"String\", \"data\": \"cool\"}"
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Proxy where
import Control.Applicative ((<$>))
import Control.Monad (mzero)
import Data.Aeson
import Data.Aeson.Types
import qualified Data.Aeson as A
import qualified Data.ByteString.Lazy as BL
import Data.Monoid ((<>))
import Data.Proxy (Proxy(Proxy))
import GHC.TypeLits (KnownSymbol, Symbol, symbolVal)
import Data.Text (pack)
type family TypeKey (a :: *) :: Symbol where
TypeKey Int = "int"
TypeKey String = "string"
-- | Instances for serializing Proxy
instance KnownSymbol s => ToJSON (Proxy s) where
toJSON = A.String . pack . symbolVal
instance KnownSymbol s => FromJSON (Proxy s) where
parseJSON (A.String s) | s == pack (symbolVal (Proxy :: Proxy s)) = return (Proxy :: Proxy s)
parseJSON _ = mzero
-- | our new data type.
data Payload (s :: Symbol) a :: * where
Payload :: a
-> Payload (TypeKey a) a
-- | ToJSON instance
instance (s ~ TypeKey a, KnownSymbol s, ToJSON a) => ToJSON (Payload s a) where
toJSON (Payload a) = object [ "type" .= (Proxy :: Proxy s)
, "data" .= a
]
-- | FromJSON instance
instance (s ~ TypeKey a, KnownSymbol s, FromJSON a) => FromJSON (Payload s a) where
parseJSON (Object v) = (v .: "type" :: Parser (Proxy s))
>>
Payload <$> v .: "data"
parseJSON _ = mzero
-- | Show intance for ghci
instance (KnownSymbol s, Show a) => Show (Payload s a) where
show (Payload a) = "Payload " <> symbolVal (Proxy :: Proxy s) <> " " <> show a
jsonString :: BL.ByteString
jsonString = "{\"type\": \"string\", \"data\": \"cool\"}"
x :: Payload "int" Int
x = Payload 10
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
module Proxy where
import Control.Applicative ((<$>))
import Control.Monad (mzero)
import Data.Aeson
import Data.Aeson.Types
import qualified Data.Aeson as A
import qualified Data.ByteString.Lazy as BL
import Data.Monoid ((<>))
import Data.Proxy (Proxy(Proxy))
import GHC.TypeLits (KnownSymbol, Symbol, symbolVal)
import Data.Text (pack)
type family TypeKey (a :: *) :: Symbol where
TypeKey Int = "int"
TypeKey String = "string"
-- | Instances for serializing Proxy
instance KnownSymbol s => ToJSON (Proxy s) where
toJSON = A.String . pack . symbolVal
instance KnownSymbol s => FromJSON (Proxy s) where
parseJSON (A.String s) | s == pack (symbolVal (Proxy :: Proxy s)) = return (Proxy :: Proxy s)
parseJSON _ = mzero
-- | our new data type.
data Payload (s :: Symbol) a :: * where
Payload :: a
-> Payload (TypeKey a) a
-- | ToJSON instance
instance (s ~ TypeKey a, KnownSymbol s, ToJSON a) => ToJSON (Payload s a) where
toJSON (Payload a) = object [ "type" .= (Proxy :: Proxy s)
, "data" .= a
]
-- | FromJSON instance
instance (s ~ TypeKey a, KnownSymbol s, FromJSON a) => FromJSON (Payload s a) where
parseJSON (Object v) = (v .: "type" :: Parser (Proxy s))
>>
Payload <$> v .: "data"
parseJSON _ = mzero
-- | Show intance for ghci
instance (KnownSymbol s, Show a) => Show (Payload s a) where
show (Payload a) = "Payload " <> symbolVal (Proxy :: Proxy s) <> " " <> show a
jsonString :: BL.ByteString
jsonString = "{\"type\": \"string\", \"data\": \"cool\"}"
x :: Payload "int" Int
x = Payload 10
data Message a where
Message :: (s ~ TypeKey a, KnownSymbol s)
=> Payload s a
-> Message a
instance ToJSON a => ToJSON (Message a) where
toJSON (Message payload) = object [ "payload" .= payload ]
instance (s ~ TypeKey a, KnownSymbol s, FromJSON a) => FromJSON (Message a) where
parseJSON (Object v) = Message <$> v .: "payload"
parseJSON _ = mzero
instance Show a => Show (Message a) where
show (Message p) = "Message ( " <> show p <> " )"
messageStringA :: BL.ByteString
messageStringA = "{ \"payload\": {\"type\": \"string\", \"data\": \"cool\"} }"
messageStringB :: BL.ByteString
messageStringB = "{ \"payload\": {\"type\": \"string\", \"data\": 10} }"
y :: Message Int
y = Message (Payload 10 :: Payload "int" Int)
data Env a = Env { m :: Message a }
instance FromJSON (Message a) => FromJSON (Env a) where
parseJSON (Object v) = Env <$> v .: "envelope"
parseJSON _ = mzero

Encoding Types in your JSON Strings

yo dawg, I heard you like strings in your types so I put a type in your string so you could type check your strings while you stringify your types - Proxy "XZibit"

The saying goes that one should encode as many invariants in the type system as possible. This way bad programs don't type check, thereby affording you hours (previously spent writing unit tests) to meander in the warm embrace of nihilism. With that in mind I was faced with the following problem: at work we have a json-encoded message object that has generic payloads. However, each payload should contain the keys "type" and "data" where "type" contains a string-encoded value to indicate how to route or parse "data". For example:

{ "from": "you"
, "to"  : "me"
, "payload" : { "type" : "identity_validation_request"
              , "data"     : { ... IdentityValidationData data ... }
              }
}

As time withers and our corporeal bodies float aimlessly through this cold, meaningless universe, the possible values of "type" (in this example: "identity_validation_request") will increase and be littered throughout our codebase as we add various types of payload. These values are a great example of invariants that should be encoded somewhere in our type system. But how?

The goal of this post is to create a data type that encodes our "type" value as a type-level string, holds this type-level string in its type, and succesfully parses a json encoded string if the "type" value in some json matches the type-level string. It should behave something like this:

-- | `s` will hold the value of our type, `a` is the data type of the payload.
data Payload s a

-- | creating a function like this is the goal
parse :: ByteString -> Maybe (Payload s a)

-- | here is some an example json to parse
jsonB :: ByteString
jsonB = "{\"type\" : \"identity_validation_request\", \"data\": ... }"

-- | itWorks should evaluate to the value
-- 'Just (Payload "identity_validation_request" IdentityValidationData)'
itWorks = decode jsonB :: Maybe (Payload "identity_validation_request" IdentityValidationData)

-- | doesNotParse should evaluate to 'Nothing' as
-- the type level string "xxx" does not match "identity_validation_request
doesNotParse = decode jsonB :: Maybe (Payload "xxxx" IdentityValidationData)

Additionally, we will strive to maintain a global index of (type-level string, type) pairs using a type family, and also provide a simple, polymorphic container for clients to use.

To get there, we will:

  1. serialize and de-serialize Proxy values of type Proxy (s :: Symbol).
  2. serialize and de-serialize a Payload (s :: Symbol) (a :: *) datatype to associcate arbitrary payloads with type-level strings.
  3. introduce the TypeKey type family to maintain a global index of types and their assumed keys.
  4. serialize and de-serialize values of type Payload (TypeKey a) a.
  5. serialize and de-serialize values of type Message a, a polymorphic wrapper around Payload (TypeKey a) a, creating a nice interface for our clients.
  6. show that Message a satisfies all our requirements.

table of contents:

  1. Sum Types: The Simplest Thing That Works
  2. (De)Serializing Data.Proxy
  3. A More General DeSerialization of Data.Proxy
  4. Stringly-Typed Programming for the Masses
  5. A Global Index of Your String Types
  6. Polymorphic Containers
  7. Conclusion

1. Sum Types: The Simplest Thing That Works

code: https://gist.github.com/aaronlevin/4aa22bd9c79997029167#file-01-simplest-thing-that-works-hs

Before we dive into type-level tomfoolery, let's create the simplest thing that works. The goal is to dispatch based on the key in "type". There are a few ways of doing this. We could forgoe any sense of type-safety and write FromJSON instances that inspect "type" willy-nilly. However, to bring some sanity to our codebase, let's use a basic sum type called Payloads which contain all possible payloads. This will force us to put all "type" string assumptions in one place.

{-# LANGUAGE OverloadedStrings #-}

module SimpleThings where

import Control.Applicative ((<$>), (<*>))
import Control.Monad (mzero)
import Data.Aeson
import qualified Data.Aeson as A

-- | sum type containing all possible payloads
data Payloads = FooPayload Int String
              | InviteRequestPayload String
              | IdentityValidationRequestPayload Int Int String

-- | dispatch on the value of `"type"`
instance FromJSON Payloads where
    parseJSON (Object v) = v .: "type" >>= handlePayloadType
          where
            -- handle foo_payload key
            handlePayloadType (A.String "foo_payload") =
                v .: "data" >>= \sub -> FooPayload <$> sub .: "id" <*> sub .: "msg"

            -- handle invite_request key
            handlePayloadType (A.String "invite_request") =
                v .: "data" >>= \sub -> InviteRequestPayload <$> sub .: "msg"

            -- handle identity_validation_data key
            handlePayloadType (A.String "identity_validation_data") =
                v .: "data" >>= \sub -> IdentityValidationRequestPayload <$> sub .: "from"
                                                                         <*> sub .: "to"
                                                                         <*> sub .: "validation_msg"

            -- default
            handlePayloadType _ = mzero
    parseJSON _                 = mzero

In this design we are expecting a json structure that looks like: {"type" : "...", "data": { ... }}. To parse this we inspect the value of the "type" key and dispatch accordingly. This approach is simple and forces us to keep the assumed json keys in one place. However, we loose lots of generality. This solution is not polymorphic (i.e. we aren't working with an arbitrary, abstract container) and is prone to the expression problem (add a new payload, we now have to dispatch in a bunch of places).

To improve on this we want a data type that looks like:

data Payload a = Payload { payload :: a }

How and where do we encode the "type" string in this polymorphic container? Note that we can no longer dispatch over the "type" value anymore and return different types of a when writing our FromJSON instance (unless we wanted to forgoe something that looked like intance FromJSON a => FromJSON (Payload a) and instead write FromJSON (Payload SomeType) for each possible payload.

To resolve this issue we're going to take a detour into Data.Proxy, a data type that will help us pass around values that encode type-level information.

2. Serializing Data.Proxy

code: https://gist.github.com/aaronlevin/4aa22bd9c79997029167#file-02-data-proxy-hs

Data.Proxy is a great example of Haskell's expressive type system. It's an incredibly simple and essential data type in Haskell's type-level swiss army knife. It's definition:

data Proxy a = Proxy

Proxy is useful whenever you need value-level representations of information at the type level. Note that a can be of any kind, specifically, it can be of kind Symbol, which means that Proxy "foo" is a valid type (with the DataKinds extension enabled). Haskell also exposes some support for transitioning from the Symbol kind to a value of type String:

symbolVal :: KnownSymbol s => Proxy s -> String
$ ghci
Prelude> import GHC.TypeLits
Prelude GHC.TypeLits> import Data.Proxy
Prelude GHC.TypeLits Data.Proxy> :set -XDataKinds
Prelude GHC.TypeLits Data.Proxy> symbolVal (Proxy :: Proxy "foo")
"foo"

Armed with Proxy and symbolVal we can now attempt to serialize and de-serialize JSON into Proxy "foo". The ToJSON instance is pretty straight forward:

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}

module Proxy where

import Control.Monad (mzero)
import Data.Aeson
import Data.Proxy (Proxy(Proxy))
import GHC.TypeLits (KnownSymbol, symbolVal)

instance ToJSON (Proxy "foo") where
    toJSON p = object [ "type" .= symbolVal p ]

This will serialize Proxy :: Proxy "foo" into {"type": "foo"}.

Now, let's write a FromJSON instance for Proxy "foo":

instance FromJSON (Proxy "foo") where
    parseJSON (Object v) = v .: "type" >>= handleType
      where
        handleType (A.String "foo") = return (Proxy :: Proxy "foo")
        handleType _ = mzero
    parseJSON _      = mzero

jsonString :: BL.ByteString
jsonString = "{\"type\": \"foo\"}"

You can see this work in action by loading the code in ghci:

$ ghci
Prelude> :load 02-data-proxy.hs
[1 of 1] Compiling Proxy            ( 02-data-proxy.hs, interpreted )
Ok, modules loaded: Proxy.
*Proxy> :set -XDataKinds
*Proxy> decode jsonString :: Maybe (Proxy "foo")
Just Proxy
*Proxy> decode jsonString :: Maybe (Proxy "bar")

<interactive>:5:1:
    No instance for (FromJSON (Proxy "bar"))
      arising from a use of ‘decode’
    In the expression: decode jsonString :: Maybe (Proxy "bar")
    In an equation for ‘it’:
        it = decode jsonString :: Maybe (Proxy "bar")

Awesome! This works for Proxy "foo" and we get a compiler error when trying to deserializing into Proxy "bar".

3. A More General DeSerialization of Data.Proxy

code: https://gist.github.com/aaronlevin/4aa22bd9c79997029167#file-03-more-general-proxy-hs

Obviously we don't want to write a FromJSON instance for Proxy "bar" and every other type-level string that might appear. If you write the naive FromJSON instance you'll hit a wall:

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}

module Proxy where

import Control.Monad (mzero)
import Data.Aeson
import qualified Data.Aeson as A
import qualified Data.ByteString.Lazy as BL
import Data.Proxy (Proxy(Proxy))
import GHC.TypeLits (KnownSymbol, symbolVal)
import Data.Text (pack)

instance KnownSymbol s => ToJSON (Proxy s) where
    toJSON p = object [ "type" .= symbolVal p ]

instance KnownSymbol s => FromJSON (Proxy s) where
    parseJSON (Object v) = v .: "type" >>= handleType
      where
        handleType (A.String s) | s == pack (symbolVal (Proxy :: Proxy s)) = return (Proxy :: Proxy s)
        handleType _ = mzero
    parseJSON _      = mzero


jsonString :: BL.ByteString
jsonString = "{\"type\": \"foo\"}"

This intance naively checks if the value corresponding to the "type" key matches the symbolVal of the Proxy and if they match, it returns a Proxy of the correct type.

Unfortunately, if you compile this you will get the following error:

03-more-general-proxy.hs:21:44:
    Couldn't match kind ‘*’ with ‘GHC.TypeLits.Symbol’
    Expected type: Value
                   -> aeson-0.8.0.2:Data.Aeson.Types.Internal.Parser (Proxy s)
      Actual type: Value
                   -> aeson-0.8.0.2:Data.Aeson.Types.Internal.Parser (Proxy s0)
    In the second argument of ‘(>>=)’, namely ‘handleType’
    In the expression: v .: "type" >>= handleType

The problem is that GHC differentiates between the variables that appear in a type signature from the variables that appear in a function's definition. Therefore, the s in Proxy s in the type signature is different from the s in (Proxy :: Proxy s) appearing in the definition. To resolve this, we can enable the ScopedTypeVariables extension, which will extend the scope of a type variable throughout the function definition. This will allow GHC to infer that s satisfies the KnownSymbol constraint and compile. Adding {-# LANGUAGE ScopedTypeVariables #-} to our list of extensions and loading our code into ghci:

$ ghci
Prelude> :load 03-more-general-proxy.hs
[1 of 1] Compiling Proxy            ( 03-more-general-proxy.hs, interpreted )
Ok, modules loaded: Proxy.
*Proxy> :set -XDataKinds
*Proxy> decode jsonString :: Maybe (Proxy "foo")
Just Proxy

*Proxy> decode jsonString :: Maybe (Proxy "bar")
Nothing

*Proxy> import Data.ByteString.Lazy
*Proxy Data.ByteString.Lazy> :set -XOverloadedStrings
*Proxy Data.ByteString.Lazy> let otherString = "{\"type\":\"bar\"}" :: ByteString
*Proxy Data.ByteString.Lazy> decode otherString :: Maybe (Proxy "bar")
Just Proxy

4. Stringly-Typed Programming for the Masses

code: https://gist.github.com/aaronlevin/4aa22bd9c79997029167#file-04-stringly-typed-programming-hs

Let's now try to accomplish our original goal: to create a data type Payload s a were s is a type-level string representing the value we expect in the json key "type". This will reuire one additional extension (KindSignatures). We'll also be updating our ToJSON and FromJSON instanes for Proxy to look for specific strings as opposed to a full json object (this will simplify the Payload serialization). Preamble:

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Proxy where

import Control.Applicative ((<$>))
import Control.Monad (mzero)
import Data.Aeson
import Data.Aeson.Types
import qualified Data.Aeson as A
import qualified Data.ByteString.Lazy as BL
import Data.Monoid ((<>))
import Data.Proxy (Proxy(Proxy))
import GHC.TypeLits (KnownSymbol, Symbol, symbolVal)
import Data.Text (pack)

-- | Instances for serializing Proxy
instance KnownSymbol s => ToJSON (Proxy s) where
    toJSON = A.String . pack . symbolVal

instance KnownSymbol s => FromJSON (Proxy s) where
    parseJSON (A.String s) | s == pack (symbolVal (Proxy :: Proxy s)) = return (Proxy :: Proxy s)
    parseJSON _      = mzero

And now for our Payload s a data type with its ToJSON/FromJSON instances. Note that in the FromJSON instance we first attempt to deserialize into Proxy s and if successful we discard the result and parse the rest of the payload.

-- | our new data type.
newtype Payload (s :: Symbol) a = Payload a

-- | ToJSON instance
instance (KnownSymbol s, ToJSON a) => ToJSON (Payload s a) where
    toJSON (Payload a) = object [ "type" .= (Proxy :: Proxy s)
                                , "data" .= a
                                ]

-- | FromJSON instance
instance (KnownSymbol s, FromJSON a) => FromJSON (Payload s a) where
    parseJSON (Object v) = (v .: "type" :: Parser (Proxy s))
                           >>
                           Payload <$> v .: "data"
    parseJSON _          = mzero

-- | Show intance for ghci
instance (KnownSymbol s, Show a) => Show (Payload s a) where
    show (Payload a) = "Payload " <> symbolVal (Proxy :: Proxy s) <> " " <> show a

jsonString :: BL.ByteString
jsonString = "{\"type\": \"String\", \"data\": \"cool\"}"

Now, if we load this in ghci we should see the following:

$ ghci
Prelude> :set -XDataKinds
Prelude> :load 04-stringly-typed-programming.hs
[1 of 1] Compiling Proxy            ( 04-stringly-typed-programming.hs, interpreted )
Ok, modules loaded: Proxy.

*Proxy> decode jsonString :: Maybe (Payload "String" String)
Just Payload String "cool"

*Proxy> decode jsonString :: Maybe (Payload "Int" String)
Nothing

*Proxy> decode jsonString :: Maybe (Payload "String" Int)
Nothing

*Proxy> let x = Payload 10 :: Payload "My Very Special Integer" Int
*Proxy> encode x
"{\"data\":10,\"type\":\"My Very Special Integer\"}"

This is exactly what we want! We're able to specify in the Payload type exactly the value of the "type" key we expect. You might think this feature is somewhat pedantic, and I hope to dispell the myth in the next section, but consider how much more readable code using Payload becomes. If you have a REST endpoint deserializing this Payload (or any other type you encode parameters into the type) you will see, immediately, the assumptions being made simply by analyzing the type (e.g. a function returning Payload "invite_request" InviteRequest).

5. A Global Index of Your String Types

code: https://gist.github.com/aaronlevin/4aa22bd9c79997029167#file-05-indexing-your-keys-hs

All is good in the land of types and strings but we'd be remiss to wontonly throw strings in our types and hope for the best. What would be really nice is the following:

  1. A global index of keys/type-level strings and their corresponding type.
  2. A compile-time error when you make a bad assumption about a key and its type.

These two can be accomplished with a closed type family that will serve as our index and a few simple modifications to Payload s a.

We begin with a simple, closed type family, requiring the TypeFamilies exension:

type family TypeKey (a :: *) :: Symbol where
    TypeIndex Int = "int"
    TypeIndex String = "string"
    -- other types you have

To incorporate this type family we need to update our Payload s a data type to use a Generalized Algebraic Data Type, requiring the GADTs extension:

data Payload (s :: Symbol) a :: * where
    Payload :: a
            -> Payload (TypeKey a) a

To write our ToJSON/FromJSON instances we will need to take advantage of equality constraints to work around the limitations of haskell's type-level computations. Ideally we'd like to write instance ToJSON (ToJSON a, KnownSymbol (TypeKey a)) => ToJSON (Payload (TypeKey a) a), stating that if there is a ToJSON instance for a and the TypeKey mapping on a results in a known symbol, then we can write a ToJSON instance for Payload. Unfortunately doing so will result in a compiler error that looks like:

05-indexing-your-keys.hs|50 col 28 error| Could not deduce (s ~ Proxy.TypeKey a)
|| from the context (GHC.TypeLits.KnownSymbol (Proxy.TypeKey a),
||                   aeson-0.8.0.2:Data.Aeson.Types.Class.FromJSON a)
||   bound by the instance declaration
||   at /home/aterica/dev/tmp/blogpost/05-indexing-your-keys.hs:47:10-72
||   ‘s’ is a rigid type variable bound by
||       the instance declaration
||       at /home/aterica/dev/tmp/blogpost/05-indexing-your-keys.hs:47:10
|| Expected type: a -> Proxy.Payload s a
||   Actual type: a -> Proxy.Payload (Proxy.TypeKey a) a

We can work around this by using the equality constraint s ~ TypeKey a hinted to us by GHC.

-- | ToJSON instance
instance (s ~ TypeKey a, KnownSymbol s, ToJSON a) => ToJSON (Payload s a) where
    toJSON (Payload a) = object [ "type" .= (Proxy :: Proxy s)
                                , "data" .= a
                                ]

-- | FromJSON instance
instance (s ~ TypeKey a, KnownSymbol s, FromJSON a) => FromJSON (Payload s a) where
    parseJSON (Object v) = (v .: "type" :: Parser (Proxy s))
                           >>
                           Payload <$> v .: "data"
    parseJSON _          = mzero

-- | Show intance for ghci
instance (KnownSymbol s, Show a) => Show (Payload s a) where
    show (Payload a) = "Payload " <> symbolVal (Proxy :: Proxy s) <> " " <> show a

jsonString :: BL.ByteString
jsonString = "{\"type\": \"string\", \"data\": \"cool\"}"

x :: Payload "int" Int
x = Payload 10

Here, (s ~ TypeKey a, KnownSymbol s, ToJSON a) should read as: if s is constrained to be equal to TypeKey a (i.e. s is a type of kind Symbol) and s is also a KnownSymbol then we can reate a ToJSON instance for Payload s a.

Loading up ghci, we should see that trying to compile Payload "string" String should pass, while Payload "int" String should fail (because TypeKey String was defined to be "string" not "int"):

$ ghci

Prelude> :set -XDataKinds
Prelude> :load 05-indexing-your-keys.hs 

*Proxy> decode jsonString :: Maybe (Payload "string" String)
Just Payload string "cool"

*Proxy> decode jsonString :: Maybe (Payload "int" String)

<interactive>:5:1:
    Couldn't match type ‘"string"’ with ‘"int"’
    In the expression:
        decode jsonString :: Maybe (Payload "int" String)
    In an equation for ‘it’:
        it = decode jsonString :: Maybe (Payload "int" String)

As expected our TypeKey type family will ensure that we get a compile error if we assume the wrong key for a specific type!

Oh, but we are not yet done!

6. Polymorphic Containers

code: https://gist.github.com/aaronlevin/4aa22bd9c79997029167#file-06-polymorphic-containers-hs

Now, you might be thinking: ok, ok, ok, I can put my assumptions in the type, but really I don't want to specify these keys everywhere, I just want to keep this global index for a reference. So, you want a simple, polymorphic container that hides the underlying type-level machinery? I claim that with the help of a new GADT and a scary extension (UndecidableInstances) we can do this.

Here is our polymorphic container:

data Message a where
    Message :: (s ~ TypeKey a, KnownSymbol s)
            => Payload s a
            -> Message a

-- | ToJSON instance which serializes a message's payload
instance ToJSON a => ToJSON (Message a) where
    toJSON (Message payload) = object [ "payload" .= payload ]

-- | FromJSON instance
instance (s ~ TypeKey a, KnownSymbol s, FromJSON a) => FromJSON (Message a) where
    parseJSON (Object v) = Message <$> v .: "payload"
    parseJSON _ = mzero

instance Show a => Show (Message a) where
    show (Message p) = "Message ( " <> show p <> " )"

The Message a data type simply wraps the Payload s a for us, hiding the ugly deails from the client. Nevertheless, it behaves exactly as we'd expect. Consider the following ghci session:

$ ghci
Prelude> :set -XOverloadedStrings
Prelude> :load 06-polymorphic-containers.hs 

*Proxy> let message = "{ \"payload\": {\"type\": \"string\", \"data\": \"cool\"} }" :: Data.ByteString.Lazy.ByteString

*Proxy> decode message :: Maybe (Message String)
Just Message ( Payload string "cool" )

*Proxy> decode message :: Maybe (Message Int)
Nothing

*Proxy> Message (Payload 420) :: Message Int
Message ( Payload int 420 )

*Proxy> Message (Payload "420") :: Message String
Message ( Payload string "420" )

*Proxy> data Cool = Cool

*Proxy> Message (Payload Cool) :: Message Cool
<interactive>:15:1:
    No instance for (KnownSymbol (TypeKey Cool))
      arising from a use of ‘Message’
    In the expression: Message (Payload Cool) :: Message Cool
    In an equation for ‘it’:
        it = Message (Payload Cool) :: Message Cool

As you can see, Message a has he desired behaviour:

  • we can deserialize strings only if the "type" key has the right value.
  • the value of the "type" key, and thus the type-level string needed on our Payload s a type is not exposed to clients using Message.
  • if we try to create a Message with a type not indexed in our closed type family TypeKey, we get an error (e.g. Message (Payload Cool) :: Message Cool did not compile).

While this last part required a scary extension, it's somewhat safe to be used in this context.

Conclusion

To recap what we've accomplised so far, let's recall what we set out to do. We encountered a situation where we wanted to deserialize some JSON that required us to dispatch on a specific value of a json key ("type") and, based on that value, attempt to parse the JSON into a specific type. We discussed several attempts:

  1. Ad-hoc
  2. Using a sum type
  3. Encoding the expeted value of "type" in a type-level string

We spent most of the time exploring the last option. We were able to:

  1. Serialize and de-serialize Proxy values of type Proxy (s :: Symbol). This allowed us to encode the "type" value as a type-level string in the proxy.
  2. using 1 we created a Payload (s :: Symbol) (a :: *) datatype to associcate arbitrary payloads with type-level strings.
  3. we showed you could serialize and de-serialize values of type Payload s a.
  4. we then created a global index of types and the assumed keys they would have by using the type family TypeKey.
  5. using 4 we were able to serialize and de-serialize values of type Payload (TypeKey a) a, encoding our json-key assumptions at compile time in a global, unique index.
  6. we then introduced a Message a datatype that wrapped our Payload (TypeKey a) a, creating a nice interface for our clients.
  7. finally, we avoid run-time errors based on bad assumptions by having the compiler throw an error if we try to deserialize an instnae of Message a where a has no entry in our TypeKey type family index.

There is a lot more you can do with these ideas. I hope this (lengthy) post inspires you to try enoding more invariants in the type system. For further inspiration, I recommend trying to grok the reflection library, which takes the idea of encoding information within types to the next level.

May all your strings be well-typed!

{ nixpkgs ? (import <nixpkgs> {}) }:
let
myEnvFun = nixpkgs.myEnvFun;
haskell = nixpkgs.haskellPackages;
in rec {
devEnv = myEnvFun {
name = "tmp-crud";
buildInputs = [
haskell.aeson
haskell.ghc
haskell.ghcMod
];
};
}
@cyrbon
Copy link

cyrbon commented Oct 10, 2016

Great post! On GHC 8.0.1 you would also need {-# LANGUAGE OverlappingInstances #-} or it will complain about "Overlapping instances" for ToJSON FromJSON. Overlapping instances are getting depricated, so a new way of doing it without {-# LANGUAGE OverlappingInstances #-} would be like this:

-- | Instances for serializing Proxy
instance {-# OVERLAPPING #-} KnownSymbol s => ToJSON (Proxy s) where
    toJSON = A.String . pack . symbolVal

instance {-# OVERLAPPING #-} KnownSymbol s => FromJSON (Proxy s) where
    parseJSON (A.String s) | s == pack (symbolVal (Proxy :: Proxy s)) = return (Proxy :: Proxy s)
    parseJSON _      = mzero

-- | our new data type.
newtype Payload (s :: Symbol) a = Payload a

-- | ToJSON instance
instance {-# OVERLAPPABLE #-} (KnownSymbol s, ToJSON a) => ToJSON (Payload s a) where
    toJSON (Payload a) = object [ "type" .= (Proxy :: Proxy s)
                                , "data" .= a
                                ]

-- | FromJSON instance
instance {-# OVERLAPPABLE #-} (KnownSymbol s, FromJSON a) => FromJSON (Payload s a) where
    parseJSON (Object v) = (v .: "type" :: Parser (Proxy s))
                           >>
                           Payload <$> v .: "data"
    parseJSON _          = mzero

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment