Last active
October 22, 2016 11:55
-
-
Save madidier/aae3b276e6044ce5eeef713dd101d0c7 to your computer and use it in GitHub Desktop.
A general DSL for consuming structured tabular data (i.e. wide CSVs, XLSXs...)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE BangPatterns #-} | |
{-# LANGUAGE GADTs #-} | |
{-# LANGUAGE LambdaCase #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
module Tabulata | |
( -- * Types | |
Fields | |
, Range | |
-- * DSL primitives | |
, rawField | |
, validate | |
, validatePure | |
-- * DSL evaluation | |
, extractHeader | |
, parseRow | |
) where | |
import Control.Applicative.Free.Final (Ap, liftAp, runAp, runAp_) | |
import Control.Monad.Reader (ReaderT, ask, runReaderT) | |
import Control.Monad.State (StateT, evalStateT, get, put) | |
import Control.Monad.Trans (lift) | |
import Data.Function ((&)) | |
import Data.Functor.Compose (Compose(..)) | |
import Data.Semigroup (Semigroup) | |
import Data.Text (Text) | |
import Data.Validation (AccValidation(..)) | |
-- See also: https://gist.github.com/madidier/05e780651ec38dd3c5632fcfc0a92990 | |
-- | Represents a structured set of fields | |
-- | |
-- The type arguments represent the following : | |
-- - m is the evaluation context in which the validation functions will run | |
-- (i.e., you can use that to give validation functions access to a database) | |
-- - e is the type of validation error summaries (they must be concatenable) | |
-- - n is the type of column names | |
-- - p is the type of row indexes | |
-- - c is the type of the cells' contents | |
-- - a is the type of the value obtained from a row or a substructure in a row | |
-- | |
-- From these types, the interpreter will only know : | |
-- - How to stitch together computations in the m evaluation context | |
-- (it "asks" for a Monad instance). | |
-- - How to combine/concat error messages (it "asks" for a Semigroup instance) | |
type Fields m e n p c = Ap (Field m e n p c) | |
-- | Represents a range of cells | |
data Range p | |
= Range | |
{ rangeRow :: p | |
, rangeStart :: Int | |
, rangeEnd :: Int | |
} deriving (Show, Eq) | |
data Field m e n p c a where | |
GetRawField :: n -> Field m e n p c c | |
Validate :: Fields m e n p c (Range p -> m (AccValidation e a)) -> Field m e n p c a | |
-- | A simple, stringly type field with the given name | |
rawField :: n -> Fields m e n p c c | |
rawField name = liftAp (GetRawField name) | |
-- | Performs validation on a substructure | |
validate :: Fields m e n p c (Range p -> m (AccValidation e a)) -> Fields m e n p c a | |
validate fields = liftAp (Validate fields) | |
-- | Uses a pure validation function on a substructure | |
validatePure :: Monad m => Fields m e n p c (Range p -> AccValidation e a) -> Fields m e n p c a | |
validatePure fields = | |
validate $ (\res -> (\range -> return (res range))) <$> fields | |
-- | Retrieves the fields names from a definition | |
extractHeader :: Fields m e n p c a -> [n] | |
extractHeader = runAp_ | |
(\case | |
GetRawField name -> [name] | |
Validate f -> extractHeader f | |
) | |
-- | Extracts data from a single row | |
parseRow :: (Monad m, Semigroup e) | |
=> Fields m e n p c a -- The DSL expression to run | |
-> p -- The current row's position | |
-> [c] -- The row's contents | |
-> AccValidation e c -- The value to use in case a row ended prematurely | |
-> m (AccValidation e a) | |
parseRow definition rowNo contents overflowValue | |
= parseRow' definition | |
& getCompose | |
& flip runReaderT (rowNo, overflowValue) | |
& flip evalStateT (contents, 0) | |
type FIELDS m e p c a | |
= Compose (ReaderT (p, AccValidation e c) | |
(StateT ([c], Int) m)) | |
(AccValidation e) a | |
parseRow' :: (Monad m, Semigroup e) => Fields m e n p c a -> FIELDS m e p c a | |
parseRow' = runAp | |
(\case | |
GetRawField name -> Compose $ do | |
(xs, !n) <- get | |
(_, overflowValue) <- ask | |
case xs of | |
x : xs -> do { put (xs, n + 1); return (AccSuccess x) } | |
[] -> do { put ([], n + 1); return overflowValue } | |
Validate fields -> Compose $ do | |
(_, rangeStart) <- get | |
res <- getCompose (parseRow' fields) | |
(_, rangeEnd) <- get | |
(currentRow, _) <- ask | |
case res of | |
AccSuccess validate -> lift . lift . validate $ Range currentRow rangeStart rangeEnd | |
AccFailure err -> return (AccFailure err) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment