Skip to content

Instantly share code, notes, and snippets.

@arademaker
Created October 31, 2020 20:59
Show Gist options
  • Save arademaker/e7ae2e1ef5225807a3e627caa61481da to your computer and use it in GitHub Desktop.
Save arademaker/e7ae2e1ef5225807a3e627caa61481da to your computer and use it in GitHub Desktop.
Merge.hs
{-# LANGUAGE DeriveGeneric, OverloadedStrings #-}
import qualified Data.ByteString.Lazy as B
import Data.List
import System.FilePath.Posix
import System.IO
import System.Environment
import Data.Aeson
import Data.Text
import GHC.Generics
{-
read two JSON files; align the sentences; align the tokens using its
spans (b,e); and find the root of both to: list the text and roots
indicating match vs mismatch
-}
data Token = Token
{ id :: Int
, form :: !Text
, lemma :: !Text
, upostag :: !Text
, xpostag :: !Text
, feats :: !Text
, head :: Int
, deprel :: !Text
, deps :: !Text
, misc :: !Text
, cfrom :: Int
, cto :: Int
, lineno :: Int
} deriving (Show, Generic)
data Sentence = Sentence
{ text :: !Text
, sent_id :: !Text
, tokens :: [Token]
} deriving (Show, Generic)
instance FromJSON Token
instance ToJSON Token
instance FromJSON Sentence
instance ToJSON Sentence
eespFile :: FilePath
eespFile = "sentences-with-p-eesp.json"
eespJSON :: IO B.ByteString
eespJSON = B.readFile eespFile
main :: IO ()
main = do
d <- (eitherDecode <$> eespJSON) :: IO (Either String [Sentence])
case d of
Left err -> putStrLn err
Right ps -> print ps
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment