Skip to content

Instantly share code, notes, and snippets.

@clane9
Last active January 7, 2025 19:37
Show Gist options
  • Save clane9/8ff0b52f9ecf88a90fc350d448a80a23 to your computer and use it in GitHub Desktop.
Save clane9/8ff0b52f9ecf88a90fc350d448a80a23 to your computer and use it in GitHub Desktop.
Parse the output of the linux `tree` command to json.
"""
Parse the output of the linux `tree` command to a flat list of files
Example::
python tree_to_list.py tree.txt
"""
import argparse
import logging
import re
from dataclasses import dataclass, field
from functools import lru_cache
from pathlib import Path
from typing import Optional, List, Tuple
logging.basicConfig(level=logging.INFO)
@dataclass
class Node:
"""
A node of a directory tree with references to parent and children as well as
the path and depth.
"""
path: Optional[Path] = None
parent: Optional["Node"] = None
children: List["Node"] = field(default_factory=list)
depth: int = 0
def to_dict(self):
"""
Convert the tree under the current node to a dict mapping paths to lists
of children.
"""
if len(self.children) == 0:
return str(self.path)
return {str(self.path): [child.to_dict() for child in self.children]}
lru_cache()
def full_path(self):
"""
Get the full path for the node.
"""
if self.parent is None:
return self.path
return self.parent.full_path() / self.path
def parse_tree_output(tree_path: str) -> Tuple[Node, List[Path]]:
"""
Parse the output of the linux `tree` command stored in `tree_path` and
return a `Node` representing the parsed tree and a list of paths.
"""
paths = []
with open(tree_path) as f:
# Assume the root directory is on the first line
root = f.readline().strip()
tree = parent = node = Node(path=Path(root))
# Parse lines one by one
for line in f.readlines():
# Split the tree formatting prefix and the path for lines like:
# │ │ │ ├── 1.51.51.5_rotated.4dfp.ifh
# TODO: any corner cases missed with this oversimple regex?
match = re.match("(.*?── )(.*)", line)
if match is None:
logging.warning(f"Line {repr(line)} didn't match")
continue
prefix, path = match.groups()
path = Path(path.strip())
depth = len(prefix) // 4
# Determine nesting level relative to previous node
if depth > node.depth:
parent = node
elif depth < node.depth:
for _ in range(depth, node.depth):
parent = parent.parent
# Append to tree at the appropriate level
node = Node(path, parent=parent, depth=depth)
parent.children.append(node)
# Append full path to list
paths.append(node.full_path())
return tree, paths
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"tree_text",
type=str,
help="path to text file containing tree output",
)
args = parser.parse_args()
tree, paths = parse_tree_output(args.tree_text)
print("\n".join([str(p) for p in paths]))
@Pomax
Copy link

Pomax commented Jan 7, 2025

note that tree 1.7 and up have json output built in: tree -J will generate output that can be directly loaded by anything that knows how to parse JSON.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment