Created
October 31, 2020 20:59
-
-
Save arademaker/e7ae2e1ef5225807a3e627caa61481da to your computer and use it in GitHub Desktop.
Merge.hs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE DeriveGeneric, OverloadedStrings #-} | |
import qualified Data.ByteString.Lazy as B | |
import Data.List | |
import System.FilePath.Posix | |
import System.IO | |
import System.Environment | |
import Data.Aeson | |
import Data.Text | |
import GHC.Generics | |
{- | |
read two JSON files; align the sentences; align the tokens using its | |
spans (b,e); and find the root of both to: list the text and roots | |
indicating match vs mismatch | |
-} | |
data Token = Token | |
{ id :: Int | |
, form :: !Text | |
, lemma :: !Text | |
, upostag :: !Text | |
, xpostag :: !Text | |
, feats :: !Text | |
, head :: Int | |
, deprel :: !Text | |
, deps :: !Text | |
, misc :: !Text | |
, cfrom :: Int | |
, cto :: Int | |
, lineno :: Int | |
} deriving (Show, Generic) | |
data Sentence = Sentence | |
{ text :: !Text | |
, sent_id :: !Text | |
, tokens :: [Token] | |
} deriving (Show, Generic) | |
instance FromJSON Token | |
instance ToJSON Token | |
instance FromJSON Sentence | |
instance ToJSON Sentence | |
eespFile :: FilePath | |
eespFile = "sentences-with-p-eesp.json" | |
eespJSON :: IO B.ByteString | |
eespJSON = B.readFile eespFile | |
main :: IO () | |
main = do | |
d <- (eitherDecode <$> eespJSON) :: IO (Either String [Sentence]) | |
case d of | |
Left err -> putStrLn err | |
Right ps -> print ps | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment