Skip to content

Instantly share code, notes, and snippets.

@Yengas
Last active April 4, 2025 08:30
Show Gist options
  • Save Yengas/609d2f76c175bfddba9ba9e39d6512a3 to your computer and use it in GitHub Desktop.
Save Yengas/609d2f76c175bfddba9ba9e39d6512a3 to your computer and use it in GitHub Desktop.
Creating Table of Contents by reading the folder and file structure of a git project

toc.py

This script reads the structure of your git project to create a markdown table of contents. You can use the output of this script to add a TOC to the README of your project.

Example folder structure:

.
├── Test
│   └── README.md
├── Test2
│   ├── README.md
│   └── SubTest
│       ├── Extra.md
│       └── README.md
└── Test3

Example output:

import os
import urllib
import sys
import argparse
# Sorts files by their name. Directories come first.
def sort_files(a, b):
if a == b:
return 0
if a == None or b == None:
return -1 if a == None else 1;
left, right = os.path.isdir(a), os.path.isdir(b)
if left == right:
return -1 if a < b else 1;
return -1 if left else 1;
# Creates a line of text for a directory entry.
def directory_line(file_name, full_path, level):
has_readme = os.path.isfile(os.path.join(full_path, 'README.md'))
return ('\t' * level) + '- ' + (file_name if not has_readme else '[%s](%s)' % (file_name, urllib.pathname2url(full_path)))
# Creates a line of text for a file entry.
def file_line(file_name, full_path, level):
return ('\t' * level) + '- [%s](%s)' % (os.path.splitext(file_name)[0], urllib.pathname2url(full_path))
# Walks a given directory to create a TOC out of it.
def walk_directory(path = '.', exclude = ['.git', '.idea'], level = 0):
result, files = [], os.listdir(path)
# Sort by directory/name
files.sort(lambda x, y : sort_files(os.path.join(path, x), os.path.join(path, y)))
for file_name in files:
full_path = os.path.join(path, file_name)
# Skip the file if its in the exclude list.
if file_name in exclude: continue
if os.path.isdir(full_path):
result.append(directory_line(file_name, full_path, level))
result.extend(walk_directory(full_path, exclude, level + 1))
elif file_name != 'README.md' and file_name.endswith('.md'):
result.append(file_line(file_name, full_path, level))
return result
def replace_toc(file_path, toc, toc_start, toc_end):
toc_file = open(file_path, 'r').read()
start, end =toc_file.find(toc_start), toc_file.find(toc_end);
return (toc_file[:start + len(toc_start)]) + ("\n\n%s\n\n" % toc) + (toc_file[end:]);
parser = argparse.ArgumentParser()
parser.add_argument("--readme", help="Searches and replaces the lines between toc-start and toc-end in the given file and prints the output. If not given, the script just prints the TOC generated.")
parser.add_argument("--target", help="Target folder to create TOC for.", default=".");
parser.add_argument("--exclude", help="List of folder and file names to exclude.", default=['.git', '.idea'], type=str, nargs='+');
parser.add_argument("--toc-start", help="Start of the TOC.", default="[//]: # (TOCSTART)");
parser.add_argument("--toc-end", help="End of the TOC.", default="[//]: # (TOCEND)");
args = parser.parse_args()
result = "\n".join(walk_directory(args.target, args.exclude));
if args.readme == None:
sys.stdout.write(result);
else:
sys.stdout.write(replace_toc(args.readme, result, args.toc_start, args.toc_end));
sys.stdout.flush();
@kyrylo-ushkalov-clearscale

Here are some bash and python scripts with properly URL-encoded file paths.

Bash:

#!/bin/bash
find . -name "*.md" | sort | gawk '
BEGIN {FS="/"}
sub(/^\.\//,"") { 
  path=""
  for (i=1; i<NF; i++) path=path"/"$i
  if (path != old) {
    for (i=2; i<NF; i++) printf "\t"
    print "- " $(NF-1)
  } 
  file = $NF
  name = gensub(/\.md$/, "", "g", file)
  fullpath = gensub(/^\.\//, "", "g", $0)
  for (i=2; i<=NF; i++) printf "\t"
  print "- [" name "](" fullpath ")"
  old = path
}' | uniq

Python:

import os
import urllib.parse

def generate_toc(root_dir=".", exclude_dirs={'.git', '.idea'}, level=0):
    entries = []
    try:
        items = sorted(os.listdir(root_dir), key=lambda x: (not os.path.isdir(os.path.join(root_dir, x)), x.lower()))
    except PermissionError:
        return entries

    for item in items:
        full_path = os.path.join(root_dir, item)
        if item in exclude_dirs:
            continue
        if os.path.isdir(full_path):
            entries.append(f'{"    " * level}- {item}')
            entries += generate_toc(full_path, exclude_dirs, level + 1)
        elif item.endswith(".md") and item.lower() != "readme.md":
            name = os.path.splitext(item)[0]
            url = urllib.parse.quote(os.path.relpath(full_path))
            entries.append(f'{"    " * level}- [{name}]({url})')
    return entries

if __name__ == "__main__":
    toc = generate_toc()
    print("\n".join(toc))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment