Skip to content

Instantly share code, notes, and snippets.

@Yengas
Last active April 4, 2025 08:30
Show Gist options
  • Save Yengas/609d2f76c175bfddba9ba9e39d6512a3 to your computer and use it in GitHub Desktop.
Save Yengas/609d2f76c175bfddba9ba9e39d6512a3 to your computer and use it in GitHub Desktop.
Creating Table of Contents by reading the folder and file structure of a git project

toc.py

This script reads the structure of your git project to create a markdown table of contents. You can use the output of this script to add a TOC to the README of your project.

Example folder structure:

.
├── Test
│   └── README.md
├── Test2
│   ├── README.md
│   └── SubTest
│       ├── Extra.md
│       └── README.md
└── Test3

Example output:

import os
import urllib
import sys
import argparse
# Sorts files by their name. Directories come first.
def sort_files(a, b):
if a == b:
return 0
if a == None or b == None:
return -1 if a == None else 1;
left, right = os.path.isdir(a), os.path.isdir(b)
if left == right:
return -1 if a < b else 1;
return -1 if left else 1;
# Creates a line of text for a directory entry.
def directory_line(file_name, full_path, level):
has_readme = os.path.isfile(os.path.join(full_path, 'README.md'))
return ('\t' * level) + '- ' + (file_name if not has_readme else '[%s](%s)' % (file_name, urllib.pathname2url(full_path)))
# Creates a line of text for a file entry.
def file_line(file_name, full_path, level):
return ('\t' * level) + '- [%s](%s)' % (os.path.splitext(file_name)[0], urllib.pathname2url(full_path))
# Walks a given directory to create a TOC out of it.
def walk_directory(path = '.', exclude = ['.git', '.idea'], level = 0):
result, files = [], os.listdir(path)
# Sort by directory/name
files.sort(lambda x, y : sort_files(os.path.join(path, x), os.path.join(path, y)))
for file_name in files:
full_path = os.path.join(path, file_name)
# Skip the file if its in the exclude list.
if file_name in exclude: continue
if os.path.isdir(full_path):
result.append(directory_line(file_name, full_path, level))
result.extend(walk_directory(full_path, exclude, level + 1))
elif file_name != 'README.md' and file_name.endswith('.md'):
result.append(file_line(file_name, full_path, level))
return result
def replace_toc(file_path, toc, toc_start, toc_end):
toc_file = open(file_path, 'r').read()
start, end =toc_file.find(toc_start), toc_file.find(toc_end);
return (toc_file[:start + len(toc_start)]) + ("\n\n%s\n\n" % toc) + (toc_file[end:]);
parser = argparse.ArgumentParser()
parser.add_argument("--readme", help="Searches and replaces the lines between toc-start and toc-end in the given file and prints the output. If not given, the script just prints the TOC generated.")
parser.add_argument("--target", help="Target folder to create TOC for.", default=".");
parser.add_argument("--exclude", help="List of folder and file names to exclude.", default=['.git', '.idea'], type=str, nargs='+');
parser.add_argument("--toc-start", help="Start of the TOC.", default="[//]: # (TOCSTART)");
parser.add_argument("--toc-end", help="End of the TOC.", default="[//]: # (TOCEND)");
args = parser.parse_args()
result = "\n".join(walk_directory(args.target, args.exclude));
if args.readme == None:
sys.stdout.write(result);
else:
sys.stdout.write(replace_toc(args.readme, result, args.toc_start, args.toc_end));
sys.stdout.flush();
@truthcures
Copy link

Traceback (most recent call last):
File "./Scripts/toc.py", line 65, in
result = "\n".join(walk_directory(args.target, args.exclude));
File "./Scripts/toc.py", line 38, in walk_directory
files.sort(lambda x, y : sort_files(os.path.join(path, x), os.path.join(path, y)))

TypeError: must use keyword argument for key function

@Baarsgaard
Copy link

Baarsgaard commented Feb 3, 2022

Could not get this working as is, so made a few changes.
Works with python 3.8

Changes:

- import urllib
+ from urllib import request

- return ('\t' * level) + '- ' + (file_name if not has_readme else '[%s](%s)' % (file_name, urllib.pathname2url(full_path)))
+ return ('\t' * level) + '- ' + (file_name if not has_readme else '[%s](%s)' % (file_name, request.pathname2url(full_path)))
- return ('\t' * level) + '- [%s](%s)' % (os.path.splitext(file_name)[0], urllib.pathname2url(full_path))
+ file_root = os.path.splitext(file_name)[0]
+ return ('\t' * level) + '- [%s](%s)' % (file_root, request.pathname2url(file_root))
- result, files = [], os.listdir(path)
+ # Dir list
+ dirlist = sorted([x for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))])
+ # File list
+ filelist = sorted([x for x in os.listdir(path) if not os.path.isdir(os.path.join(path, x))])
+ 
+ result=[]
+ # Make sure directories are listed before files (personal Preference)
+ for file_name in dirlist + filelist:

(And I removed some extra semi colons)

Working example
import os
from urllib import request
import sys
import argparse

# Sorts files by their name. Directories come first.
def sort_files(a, b):
    if a == b:
        return 0
    if a == None or b == None:
        return -1 if a == None else 1
    left, right = os.path.isdir(a), os.path.isdir(b)
    if left == right:
        return -1 if a < b else 1
    return -1 if left else 1


# Creates a line of text for a directory entry.
def directory_line(file_name, full_path, level):
    has_readme = os.path.isfile(os.path.join(full_path, 'README.md'))
    return ('\t' * level) + '- ' + (file_name if not has_readme else '[%s](%s)' % (file_name, request.pathname2url(full_path)))

# Creates a line of text for a file entry.
def file_line(file_name, full_path, level):
    file_root = os.path.splitext(file_name)[0]
    return ('\t' * level) + '- [%s](%s)' % (file_root, request.pathname2url(file_root))

# Walks a given directory to create a TOC out of it.
def walk_directory(path = '.', exclude = ['.git', '.idea'], level = 0):
    # Dir list
    dirlist = sorted([x for x in os.listdir(path) if os.path.isdir(os.path.join(path, x))])
    # File list
    filelist = sorted([x for x in os.listdir(path) if not os.path.isdir(os.path.join(path, x))])

    result=[]
    # Make sure directories are listed before files (personal Preference)
    for file_name in dirlist + filelist:
        full_path = os.path.join(path, file_name)
        # Skip the file if its in the exclude list.
        if file_name in exclude: continue
        if os.path.isdir(full_path):
            result.append(directory_line(file_name, full_path, level))
            result.extend(walk_directory(full_path, exclude, level + 1))
        elif file_name != 'README.md' and file_name.endswith('.md'):
            result.append(file_line(file_name, full_path, level))
    return result

def replace_toc(file_path, toc, toc_start, toc_end):
    toc_file = open(file_path, 'r').read()
    start, end =toc_file.find(toc_start), toc_file.find(toc_end)
    return (toc_file[:start + len(toc_start)]) + ("\n\n%s\n\n" % toc) + (toc_file[end:])



parser = argparse.ArgumentParser()
parser.add_argument("--readme", help="Searches and replaces the lines between toc-start and toc-end in the given file and prints the output. If not given, the script just prints the TOC generated.")
parser.add_argument("--target", help="Target folder to create TOC for.", default=".")
parser.add_argument("--exclude", help="List of folder and file names to exclude.", default=['.git', '.idea'], type=str, nargs='+')
parser.add_argument("--toc-start", help="Start of the TOC.", default="[//]: # (TOCSTART)")
parser.add_argument("--toc-end", help="End of the TOC.", default="[//]: # (TOCEND)")
args = parser.parse_args()

result = "\n".join(walk_directory(args.target, args.exclude))

if args.readme == None:
    sys.stdout.write(result)
else:
    sys.stdout.write(replace_toc(args.readme, result, args.toc_start, args.toc_end))
sys.stdout.flush()

@timm
Copy link

timm commented Dec 4, 2022

I was wondering if I could write the above, a little shorter, using bash script.

Not sure it this works perfect but sh tocs.sh runs on the output to a find command

% find . -name "*.md"
./_includes/aa.md
./_includes/nn.md
./index.md
./lua.md
./assets/img/kk.md

e.g. sh toc.sh converts the above to...

- _includes
	- aa.md
	- nn.md
- index.md
- lua.md
	- img
		- kk.md

Here's the code for toc.sh

find . -name "*.md" | 
gawk '
BEGIN {FS="/"}
sub(/^\.\//,"") { 
  path=$1
  for(i=2;i<NF;i++) path=path"/"$i
  if (path!=old) {
    for(i=2;i<NF;i++) printf "\t";
    print "- " $(NF-1)
  } 
  for(i=2;i<=NF;i++) printf "\t";
   print "- " $NF 
  old=path
} ' | 
uniq 

Planned extensions: for each .md file, look inside and grab the first heading.

@kyrylo-ushkalov-clearscale

Here are some bash and python scripts with properly URL-encoded file paths.

Bash:

#!/bin/bash
find . -name "*.md" | sort | gawk '
BEGIN {FS="/"}
sub(/^\.\//,"") { 
  path=""
  for (i=1; i<NF; i++) path=path"/"$i
  if (path != old) {
    for (i=2; i<NF; i++) printf "\t"
    print "- " $(NF-1)
  } 
  file = $NF
  name = gensub(/\.md$/, "", "g", file)
  fullpath = gensub(/^\.\//, "", "g", $0)
  for (i=2; i<=NF; i++) printf "\t"
  print "- [" name "](" fullpath ")"
  old = path
}' | uniq

Python:

import os
import urllib.parse

def generate_toc(root_dir=".", exclude_dirs={'.git', '.idea'}, level=0):
    entries = []
    try:
        items = sorted(os.listdir(root_dir), key=lambda x: (not os.path.isdir(os.path.join(root_dir, x)), x.lower()))
    except PermissionError:
        return entries

    for item in items:
        full_path = os.path.join(root_dir, item)
        if item in exclude_dirs:
            continue
        if os.path.isdir(full_path):
            entries.append(f'{"    " * level}- {item}')
            entries += generate_toc(full_path, exclude_dirs, level + 1)
        elif item.endswith(".md") and item.lower() != "readme.md":
            name = os.path.splitext(item)[0]
            url = urllib.parse.quote(os.path.relpath(full_path))
            entries.append(f'{"    " * level}- [{name}]({url})')
    return entries

if __name__ == "__main__":
    toc = generate_toc()
    print("\n".join(toc))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment