Last active
August 10, 2020 23:02
-
-
Save arademaker/b88311adafd32e2bda3a0f2b1be5a9b4 to your computer and use it in GitHub Desktop.
merge two files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.List | |
import System.FilePath.Posix | |
-- read the files into a list of strings (assuming they are an ordered list of pathnames) | |
-- convert the list of pathnames into a list of objs | |
-- merge the list of objs | |
-- produce the output | |
-- or http://hackage.haskell.org/package/multiset-0.2.2/docs/Data-MultiSet.html | |
line1 = "ontonotes-release-5.0/data/files/data/english/annotations/bc/cctv/00/cctv_0000.parse" | |
line2 = "data/ontonotes/bc/cctv/00/cctv_0000.gold_conll" | |
split :: Char -> String -> [String] | |
split c xs = case break (==c) xs of | |
(ls, "") -> [ls] | |
(ls, x:rs) -> ls : split c rs | |
data Obj = Obj { path :: String, | |
name :: String, | |
extension :: String } deriving (Show) | |
instance Eq Obj where | |
x == y = name x == name y | |
instance Ord Obj where | |
(<) x y = name x < name y | |
compare x y = compare (name x) (name y) | |
(<=) x y = name x <= name y | |
path2obj :: String -> Obj | |
path2obj s = Obj (takeDirectory s) (takeBaseName s) (takeExtension s) | |
join :: [Obj] -> [Obj] -> [(Maybe Obj,Maybe Obj)] | |
join [] [] = [] | |
join [] (y:ys) = (Nothing, Just y) : join [] ys | |
join (x:xs) [] = (Just x, Nothing) : join xs [] | |
join (x:xs) (y:ys) | |
| x == y = (Just x, Just y) : join xs ys | |
| x < y = (Just x, Nothing) : join xs (y:ys) | |
| x > y = (Nothing, Just y) : join (x:xs) ys | |
print_pair sep (Just x, Just y) = intercalate sep [name x, path x, path y] | |
print_pair sep (Nothing, Just x) = intercalate sep [name x, "-", path x] | |
print_pair sep (Just x, Nothing) = intercalate sep [name x, path x, "-"] | |
main = do | |
s1 <- readFile "ont-list" | |
s2 <- readFile "propbank-list" | |
let r1 = map path2obj (lines s1) | |
let r2 = map path2obj (lines s2) | |
mapM (\o -> putStr $ print_pair " " o ++ "\n") (join r1 r2) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment