Created
July 7, 2014 23:06
-
-
Save benkolera/da7d30d8bf6ddee5a994 to your computer and use it in GitHub Desktop.
A thing written at work to do some basic xml & csv munging.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| {-# LANGUAGE OverloadedLists, OverloadedStrings, TupleSections #-} | |
| import Prelude hiding (readFile) | |
| import Control.Applicative ((<$>),(<*>)) | |
| import Control.Error (Script,left,headMay,note,runScript,scriptIO) | |
| import qualified Data.ByteString.Lazy as B | |
| import Data.Char (isDigit) | |
| import Data.Csv (FromNamedRecord,ToNamedRecord,decodeByName,encodeByName | |
| ,namedRecord,parseNamedRecord,toNamedRecord,(.=),(.:)) | |
| import Data.Default (def) | |
| import Data.Foldable (fold) | |
| import Data.Map (Map) | |
| import qualified Data.Map as M | |
| import Data.Maybe (fromMaybe,mapMaybe) | |
| import Data.Text (Text,unpack) | |
| import qualified Data.Text as T | |
| import Data.Traversable (traverse) | |
| import Data.Vector (Vector,toList) | |
| import System.IO (hPutStrLn,stderr) | |
| import qualified Text.XML as X | |
| import Text.XML (Node) | |
| import Text.XML.Cursor (Cursor,fromDocument,($//),($/),(&|),(&/),content | |
| ,laxElement,laxAttribute) | |
| data ItemRow = ItemRow | |
| { parent :: Text | |
| , name :: Text | |
| , cogsAccount :: Maybe Text | |
| , incomeAccount :: Maybe Text | |
| } deriving Show | |
| instance FromNamedRecord ItemRow where | |
| parseNamedRecord m = ItemRow | |
| <$> m .: "Parent" | |
| <*> m .: "Name" | |
| <*> m .: "Expense/COGS Account" | |
| <*> m .: "Income Account" | |
| instance ToNamedRecord ItemRow where | |
| toNamedRecord (ItemRow p n e i) = namedRecord | |
| [ "Parent" .= p | |
| , "Name" .= n | |
| , "Expense/COGS Account" .= e | |
| , "Income Account" .= i | |
| ] | |
| data AccountIndex = AccountIndex | |
| { idToName :: Map Text Text | |
| , idToParent :: Map Text Text | |
| } deriving Show | |
| main :: IO () | |
| main = runScript $ do | |
| x <- loadXml | |
| c <- loadCsv | |
| updatedCsv <- warnFailedUpdates . fmap (updateCsvAccounts x) $ c | |
| writeCsv updatedCsv | |
| warnFailedUpdates :: [Either (String,ItemRow) ItemRow] -> Script [ItemRow] | |
| warnFailedUpdates = fmap fold . traverse warnFailed | |
| where | |
| warnFailed :: Either (String,ItemRow) ItemRow -> Script [ItemRow] | |
| warnFailed (Left (err,ir)) = scriptIO $ do | |
| hPutStrLn stderr $ "Ignoring row " ++ show ir ++ " because of " ++ err | |
| return [] | |
| warnFailed (Right ir) = return [ir] | |
| loadXml :: Script AccountIndex | |
| loadXml = do | |
| c <- fromDocument <$> scriptIO (X.readFile def "accounts.xml") | |
| let recordCurs = c $// laxElement "record" | |
| return $ AccountIndex | |
| (M.fromList . fmap recordCurToNamePair $ recordCurs) | |
| (M.fromList . mapMaybe recordCurToParentPair $ recordCurs) | |
| recordCurToNamePair :: Cursor -> (Text,Text) | |
| recordCurToNamePair c = (num,name) | |
| where | |
| num = fold (c $/ laxElement "acctNumber" &/ content) | |
| name = fold (c $/ laxElement "acctName" &/ content) | |
| recordCurToParentPair :: Cursor -> Maybe (Text,Text) | |
| recordCurToParentPair c = (num,) <$> pId | |
| where | |
| num = fold (c $/ laxElement "acctNumber" &/ content) | |
| pId :: Maybe Text | |
| pId = fmap (T.takeWhile isDigit) . headMay $ | |
| c $/ laxElement "parent" &/ laxElement "name" &/ content | |
| loadCsv :: Script [ItemRow] | |
| loadCsv = do | |
| ce <- decodeByName <$> scriptIO (B.readFile "items.csv") | |
| case ce of | |
| Left err -> left $ "Csv Parse Failed: " ++ err | |
| Right c -> return . toList . snd $ c | |
| updateCsvAccounts :: AccountIndex -> ItemRow -> Either (String,ItemRow) ItemRow | |
| updateCsvAccounts idx row = do | |
| newCogs <- updateAccount . cogsAccount $ row | |
| newIncome <- updateAccount . incomeAccount $ row | |
| return $ row { cogsAccount = newCogs , incomeAccount = newIncome } | |
| where | |
| -- This bit is a bit odd because we only want to error if there was | |
| -- an id in the column. | |
| updateAccount Nothing = return Nothing | |
| updateAccount (Just id) = do | |
| name <- note ("Acct not found:" ++ unpack id,row) (lookupName idx id) | |
| let fullName = T.intercalate " : " (lookupParentNames idx id ++ [name]) | |
| return . Just $ T.concat [id," ",fullName] | |
| lookupParentNames :: AccountIndex -> Text -> [Text] | |
| lookupParentNames idx id = fromMaybe [] $ do | |
| pId <- lookupParent idx id | |
| pName <- lookupName idx pId | |
| return (pName : lookupParentNames idx pId) | |
| lookupName :: AccountIndex -> Text -> Maybe Text | |
| lookupName = flip M.lookup . idToName | |
| lookupParent :: AccountIndex -> Text -> Maybe Text | |
| lookupParent = flip M.lookup . idToParent | |
| writeCsv :: [ItemRow] -> Script () | |
| writeCsv = scriptIO | |
| . B.writeFile "items.fixed.csv" | |
| . encodeByName ["Parent","Name","Expense/COGS Account","Income Account"] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment