Last active
May 3, 2020 21:34
-
-
Save bluegraybox/7985217 to your computer and use it in GitHub Desktop.
script that parses indented text into a tree structure
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Data.Char | |
import qualified Data.String as S | |
import System.IO | |
import System.Environment | |
-- Parses indented text into a tree structure, where each node is represented as a (content, children) tuple | |
-- Example invocation: | |
-- echo -e "a\n b\n c\n d\n d2\n e\nf\n g\n h\n i\n j\n k\nl\n" | ./parse_indents | |
-- a {b, c {d, d2}, e}, f {g {h {i}}, j, k}, l | |
-- Parsing functions | |
getIndent line = | |
let (spaces, content) = span (\c -> c == ' ') line | |
in (length(spaces), content) | |
data Node = Node { content :: String, children :: [Node] } | |
parse list = parseNodes $ map getIndent list | |
parseNodes :: [(Int, String)] -> [Node] | |
parseNodes [] = [] | |
parseNodes ((_indent, ""):rest) = parseNodes rest | |
parseNodes ((indent, content):rest) = | |
-- the children of this node are indented further | |
let (childLines, siblingLines) = span (\(i,c) -> i > indent) rest | |
children = parseNodes childLines | |
siblings = parseNodes siblingLines | |
in (Node content children:siblings) | |
-- Display functions | |
join :: String -> [String] -> String | |
join d [s] = s | |
join d (h:t) = h ++ d ++ join d t | |
format :: String -> [Node] -> [String] | |
format indent [] = [] | |
format indent (node:nodes) = | |
let nodeLine = indent ++ (content node) | |
childLines = format (" " ++ indent) (children node) | |
sibLines = format indent nodes | |
in concat [[nodeLine], childLines, sibLines] | |
terseFmt :: [Node] -> String | |
terseFmt [] = "" | |
terseFmt (node:nodes) = | |
let nodeLine = content node | |
childLines = case terseFmt (children node) of | |
"" -> "" | |
c -> " {" ++ c ++ "}" | |
sibLines = case terseFmt nodes of | |
"" -> "" | |
c -> ", " ++ c | |
in nodeLine ++ childLines ++ sibLines | |
-- Entry point | |
main = do | |
content <- getContents | |
let tree = parse $ S.lines content | |
putStrLn ("Top nodes: " ++ (show (length tree))) | |
putStrLn (unlines $ format "" tree) | |
putStrLn $ terseFmt tree |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment