Skip to content

Instantly share code, notes, and snippets.

@clane9
Last active October 28, 2024 09:42
Show Gist options
  • Save clane9/8ff0b52f9ecf88a90fc350d448a80a23 to your computer and use it in GitHub Desktop.
Save clane9/8ff0b52f9ecf88a90fc350d448a80a23 to your computer and use it in GitHub Desktop.
Parse the output of the linux `tree` command to json.
"""
Parse the output of the linux `tree` command to a flat list of files
Example::
python tree_to_list.py tree.txt
"""
import argparse
import logging
import re
from dataclasses import dataclass, field
from functools import lru_cache
from pathlib import Path
from typing import Optional, List, Tuple
logging.basicConfig(level=logging.INFO)
@dataclass
class Node:
"""
A node of a directory tree with references to parent and children as well as
the path and depth.
"""
path: Optional[Path] = None
parent: Optional["Node"] = None
children: List["Node"] = field(default_factory=list)
depth: int = 0
def to_dict(self):
"""
Convert the tree under the current node to a dict mapping paths to lists
of children.
"""
if len(self.children) == 0:
return str(self.path)
return {str(self.path): [child.to_dict() for child in self.children]}
lru_cache()
def full_path(self):
"""
Get the full path for the node.
"""
if self.parent is None:
return self.path
return self.parent.full_path() / self.path
def parse_tree_output(tree_path: str) -> Tuple[Node, List[Path]]:
"""
Parse the output of the linux `tree` command stored in `tree_path` and
return a `Node` representing the parsed tree and a list of paths.
"""
paths = []
with open(tree_path) as f:
# Assume the root directory is on the first line
root = f.readline().strip()
tree = parent = node = Node(path=Path(root))
# Parse lines one by one
for line in f.readlines():
# Split the tree formatting prefix and the path for lines like:
# │ │ │ ├── 1.51.51.5_rotated.4dfp.ifh
# TODO: any corner cases missed with this oversimple regex?
match = re.match("(.*?── )(.*)", line)
if match is None:
logging.warning(f"Line {repr(line)} didn't match")
continue
prefix, path = match.groups()
path = Path(path.strip())
depth = len(prefix) // 4
# Determine nesting level relative to previous node
if depth > node.depth:
parent = node
elif depth < node.depth:
for _ in range(depth, node.depth):
parent = parent.parent
# Append to tree at the appropriate level
node = Node(path, parent=parent, depth=depth)
parent.children.append(node)
# Append full path to list
paths.append(node.full_path())
return tree, paths
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"tree_text",
type=str,
help="path to text file containing tree output",
)
args = parser.parse_args()
tree, paths = parse_tree_output(args.tree_text)
print("\n".join([str(p) for p in paths]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment