Skip to content

Instantly share code, notes, and snippets.

@mthomason
Last active January 21, 2025 17:59
Show Gist options
  • Save mthomason/ee5d9753e61cca287422c80e62b521a3 to your computer and use it in GitHub Desktop.
Save mthomason/ee5d9753e61cca287422c80e62b521a3 to your computer and use it in GitHub Desktop.
Create an `Index.md` file for an Obsidian vault. Edit script to add path to your vault.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author: Michael Thomason <[email protected]>
# Copyright (C) 2025 Michael Thomason, All rights reserved.
# License: MIT
# Obsidian Vault Index Generator
#
# This script generates an index for an Obsidian vault by scanning all markdown files
# in a specified directory. It organizes H1 and H2 headings in a nested structure.
#
# Usage:
# 1. Set the `VAULT_PATH` variable to the root directory of your Obsidian vault.
# 2. Run this script. It will create or update an `Index.md` file in the vault.
# 3. Open `Index.md` in Obsidian to view the generated index.
# 4. Set the `PRINT_WARNING` varable to false, unless you have a warning file.
#
# License:
# This code is free to use under the MIT License. See the LICENSE file for details.
import os
import re
import logging
from datetime import datetime
from calendar import month_name
# Configuration Constants
VAULT_PATH: str = '/Users/michael/Projects/scratch/notes/Notes/'
INDEX_FILE_NAME: str = 'Index.md'
FILE_EXTENSION: str = '.md' # File extension to process
DAILY_NOTES_FOLDER: str = 'DailyNotes' # Name of daily notes folder
FOLDER_THRESHOLD: int = 50 # Threshold for applying custom groupings
LOG_LEVEL: str = 'INFO' # Logging level: DEBUG, INFO, WARNING, ERROR
PRINT_WARNING: bool = True # Outputs details from a warning file.
EXCLUDE_FOLDERS: list[str] = ['Templates', 'Archive', 'Images'] # Folders to exclude from indexing
INCLUDE_STATISTICS: bool = False # Flag to enable/disable document statistics
SKIP_ROOT: bool = True
EMOJI_MAPPING: dict[str, str] = {
'DailyNotes': '📅',
'Projects': '📂',
'Work': '💼',
'Personal': '👤',
'Ideas': '💡',
'Gpt': '🤖',
'Images': '❓',
'Random': '💡',
'3rdParty': '🏢'
} # Emoji mapping for folder icons
logging.basicConfig(
level=getattr(logging, LOG_LEVEL),
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
)
def get_emoji_for_folder(folder_name: str) -> str:
"""Get the emoji for a folder by checking each part of the path against the EMOJI_MAPPING."""
for part in folder_name.split('/'):
if part in EMOJI_MAPPING:
return EMOJI_MAPPING[part]
return '📄' # Default emoji if no match is found
def find_markdown_files(vault_path: str, extension: str = FILE_EXTENSION) -> list[str]:
"""Recursively find all markdown files in the vault, excluding files in the root folder."""
logging.info("Scanning for markdown files in the vault...")
markdown_files: list[str] = []
for root, dirs, files in os.walk(vault_path):
# Skip files in the root folder
if SKIP_ROOT and root == vault_path:
logging.debug(f"Skipping files in the root folder: {root}")
continue # Skip processing files in the root folder
# Exclude specified folders
dirs[:] = [d for d in dirs if d not in EXCLUDE_FOLDERS]
for file in files:
if file.endswith(extension):
markdown_files.append(os.path.join(root, file))
logging.info(f"Found {len(markdown_files)} markdown files.")
return sorted(markdown_files, key=lambda x: os.path.basename(x), reverse=True)
def extract_headings(file_path: str) -> dict[str, list[str | tuple[str, str]]]:
"""Extract H1 and H2 headings from a markdown file."""
logging.debug(f"Extracting headings from {file_path}...")
headings: dict[str, list[str | tuple[str, str]]] = {"h1": [], "h2": []}
h1_pattern = re.compile(r'^#\s+(.*)')
h2_pattern = re.compile(r'^##\s+(.*)')
current_h1 = None
try:
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
h1_match = h1_pattern.match(line)
h2_match = h2_pattern.match(line)
if h1_match:
current_h1 = h1_match.group(1).strip()
headings["h1"].append(current_h1)
elif h2_match and current_h1 is not None:
headings["h2"].append((current_h1, h2_match.group(1).strip()))
except Exception as e:
logging.error(f"Error reading file {file_path}: {e}")
return headings
def group_files_by_folder(files: list[str]) -> dict[str, list[str]]:
"""Group files by their containing folder, preserving hierarchy."""
grouped_files: dict[str, list[str]] = {}
for file_path in files:
# Get relative path from vault root
relative_path = os.path.relpath(file_path, VAULT_PATH)
folder_name = os.path.dirname(relative_path)
# Skip files in the root directory
if folder_name == '.':
continue
grouped_files.setdefault(folder_name, []).append(file_path)
# Sort files in the DailyNotes folder by filename in descending order
if DAILY_NOTES_FOLDER in grouped_files:
grouped_files[DAILY_NOTES_FOLDER].sort(key=lambda x: os.path.basename(x), reverse=True)
return grouped_files
def get_file_metadata(file_path: str) -> tuple[int, str]:
"""Get file size and last modified date."""
file_size = os.path.getsize(file_path)
last_modified = datetime.fromtimestamp(os.path.getmtime(file_path)).strftime('%Y-%m-%d %H:%M:%S')
return file_size, last_modified
def pretty_print_month(year_month: str) -> str:
"""Convert a year-month string (e.g., '2023-07') to a pretty format (e.g., 'July 2023')."""
year, month = year_month.split('-')
return f"{month_name[int(month)]} {year}"
def write_grouped_index(index_file, folder_name: str, files: list[str], headings_extractor, grouping_func, pretty_print: bool = False) -> None:
"""Write a grouped index for a specific folder."""
anchor_name = folder_name.lower().replace(" ", "-").replace("/", "-")
emoji = get_emoji_for_folder(folder_name)
grouped_files = grouping_func(files)
# Handle sorted list of tuples
for group_key, group_files in grouped_files:
pretty_key = pretty_print_month(group_key) if pretty_print else group_key
index_file.write(f'### {pretty_key}\n\n')
for file_path in group_files:
file_name = os.path.basename(file_path)
index_file.write(f'* [[{file_name}|{file_name.rstrip(".md")}]]\n')
if INCLUDE_STATISTICS:
file_size, last_modified = get_file_metadata(file_path)
index_file.write(f' *Size: {file_size / 1024:.2f} KB, Last Modified: {last_modified}*\n')
headings = headings_extractor(file_path)
for h1 in headings["h1"]:
index_file.write(f' * [[{file_name}#{h1}|{h1}]]\n')
for h2_tuple in filter(lambda x: x[0] == h1, headings["h2"]):
index_file.write(f' * [[{file_name}#{h2_tuple[1]}|{h2_tuple[1]}]]\n')
index_file.write('\n')
def group_by_month(file_paths: list[str]) -> list[tuple[str, list[str]]]:
"""Group files by year-month and return a sorted list of tuples in descending order."""
grouped: dict[str, list[str]] = {}
for file_path in file_paths:
match = re.search(r'(\d{4})-(\d{2})', os.path.basename(file_path))
if match:
year_month = f"{match.group(1)}-{match.group(2)}"
grouped.setdefault(year_month, []).append(file_path)
# Sort the groups (year-month keys) in descending order
sorted_groups = sorted(
grouped.items(),
key=lambda x: datetime.strptime(x[0], "%Y-%m"),
reverse=True
)
return sorted_groups
def group_by_alphabet(file_paths: list[str]) -> list[tuple[str, list[str]]]:
"""Group files by their starting letter and return a sorted list of tuples."""
grouped: dict[str, list[str]] = {}
for file_path in file_paths:
first_letter = os.path.basename(file_path)[0].upper()
grouped.setdefault(first_letter, []).append(file_path)
# Sort the groups (alphabet keys) in ascending order
sorted_groups = sorted(grouped.items(), key=lambda x: x[0])
return sorted_groups
def create_index(vault_path: str, index_file_path: str) -> None:
"""Create an index file for the vault."""
markdown_files = find_markdown_files(vault_path)
grouped_files = group_files_by_folder(markdown_files)
with open(index_file_path, 'w', encoding='utf-8') as index_file:
# Vault Index
if PRINT_WARNING:
index_file.write("![[Random/Warning|Warning]]\n\n---\n")
index_file.write('# Vault Index\n\n')
index_file.write('_This file is autogenerated. Last updated: '
f'{datetime.now().strftime("%Y-%m-%d %H:%M:%S")}_\n\n')
# Folder Summary
index_file.write('## Folders\n\n')
for folder_name, files in sorted(grouped_files.items()):
emoji = get_emoji_for_folder(folder_name)
# Use the exact folder name (including slashes) for the anchor
anchor_name = f"{emoji} {folder_name}"
index_file.write(f'* [[#{anchor_name}|{emoji} {folder_name}]] ({len(files)} files)\n')
index_file.write('\n---\n\n')
# File Details
total_files = 0
total_size = 0
for folder_name, file_paths in sorted(grouped_files.items()):
total_files += len(file_paths)
total_size += sum(os.path.getsize(file_path) for file_path in file_paths)
# Use the exact folder name (including slashes) for the heading
emoji = get_emoji_for_folder(folder_name)
anchor_name = f"{emoji} {folder_name}"
# Specialized groupings for threshold or DailyNotes folder
if folder_name == DAILY_NOTES_FOLDER:
index_file.write(f'## {emoji} {folder_name}\n\n')
#index_file.write(f'## {emoji} {folder_name}-Index\n\n')
write_grouped_index(index_file, folder_name, file_paths, extract_headings, group_by_month, pretty_print=True)
elif len(file_paths) > FOLDER_THRESHOLD:
index_file.write(f'## {emoji} {folder_name}\n\n')
#index_file.write(f'## {emoji} {folder_name}-Index\n\n')
write_grouped_index(index_file, folder_name, file_paths, extract_headings, group_by_alphabet)
else:
# Standard output for other folders
index_file.write(f'## {emoji} {folder_name}\n\n')
for file_path in file_paths:
file_name = os.path.basename(file_path)
index_file.write(f'### [[{file_name}|{file_name.rstrip(".md")}]]\n')
if INCLUDE_STATISTICS:
file_size, last_modified = get_file_metadata(file_path)
index_file.write(f'*Size: {file_size / 1024:.2f} KB, Last Modified: {last_modified}*\n')
headings = extract_headings(file_path)
for h1 in headings["h1"]:
index_file.write(f'* [[{file_name}#{h1}|{h1}]]\n')
for h2_tuple in filter(lambda x: x[0] == h1, headings["h2"]):
index_file.write(f' * [[{file_name}#{h2_tuple[1]}|{h2_tuple[1]}]]\n')
index_file.write('\n')
# Summary
if INCLUDE_STATISTICS:
index_file.write('---\n\n')
index_file.write('## Summary\n\n')
index_file.write(f'- Total folders: {len(grouped_files)}\n')
index_file.write(f'- Total files: {total_files}\n')
index_file.write(f'- Total size: {total_size / (1024 * 1024):.2f} MB\n')
logging.info(f"Index file created successfully at {index_file_path}")
if __name__ == '__main__':
index_file_path = os.path.join(VAULT_PATH, INDEX_FILE_NAME)
create_index(VAULT_PATH, index_file_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment