Skip to content

Instantly share code, notes, and snippets.

@jacklinke
Created July 15, 2024 23:13
Show Gist options
  • Save jacklinke/9225dc282dc16ecd1a6a11996d5505ab to your computer and use it in GitHub Desktop.
Save jacklinke/9225dc282dc16ecd1a6a11996d5505ab to your computer and use it in GitHub Desktop.
Script to add a `created` frontmatter property to any markdown file missing one
"""This script processes markdown files in the current directory and all subdirectories,
adding a `created` property to the frontmatter if it does not already exist.
Process:
1. Recursively identify markdown files (*.md) in the current folder and all subfolders.
2. Skip any directories specified in the `DIRS_TO_SKIP` tuple.
3. For each markdown file:
a. Check if the frontmatter has a 'created' property.
b. If 'created' exists, move on to the next file.
c. If 'created' does not exist:
i. If the filename contains a date in the format '%Y%m%d' or '%Y-%m-%d', set the 'created' property to that value formatted as '%Y-%m-%d'.
ii. If the file's 'Created' metadata is within the past 24 hours, assume it's an error and use the file's 'Modified' metadata attribute to populate the 'created' property, formatted as 'YYYY-MM-DD HH:MM:SSZ'.
iii. Otherwise, use the file's 'Created' metadata attribute to populate the 'created' property, formatted as 'YYYY-MM-DD HH:MM:SSZ'.
Usage:
1. Install frontmatter if not already present: `pip install python-frontmatter`.
2. Place the script in the root directory where your markdown files are located.
3. Run the script using `python add_created_property.py`.
"""
import os
import frontmatter
from datetime import datetime, timedelta
import re
# Directories to skip
DIRS_TO_SKIP = ("_templates",)
def get_file_created_time(file_path):
"""Get the creation time of a file and format it to the desired format.
Args:
file_path (str): The path to the file.
Returns:
str: The formatted creation time.
"""
created_time = os.path.getctime(file_path)
return datetime.utcfromtimestamp(created_time).strftime("%Y-%m-%d %H:%M:%SZ")
def get_file_modified_time(file_path):
"""Get the modified time of a file and format it to the desired format.
Args:
file_path (str): The path to the file.
Returns:
str: The formatted modified time.
"""
modified_time = os.path.getmtime(file_path)
return datetime.utcfromtimestamp(modified_time).strftime("%Y-%m-%d %H:%M:%SZ")
def extract_date_from_filename(filename):
"""Extract a date from the filename if it matches the patterns '%Y%m%d' or '%Y-%m-%d'.
Args:
filename (str): The name of the file.
Returns:
str: The extracted and formatted date, or None if no date is found.
"""
date_patterns = [r"\d{8}", r"\d{4}-\d{2}-\d{2}"]
for pattern in date_patterns:
match = re.search(pattern, filename)
if match:
date_str = match.group()
try:
if len(date_str) == 8:
return datetime.strptime(date_str, "%Y%m%d").strftime("%Y-%m-%d")
elif len(date_str) == 10:
return datetime.strptime(date_str, "%Y-%m-%d").strftime("%Y-%m-%d")
except ValueError:
continue
return None
def process_markdown_files(directory):
"""Recursively process markdown files in the given directory to check and add 'created' property in frontmatter.
Args:
directory (str): The root directory to start the search.
"""
for root, dirs, files in os.walk(directory):
# Skip directories specified in DIRS_TO_SKIP
dirs[:] = [d for d in dirs if d not in DIRS_TO_SKIP]
for file in files:
if file.endswith(".md"):
file_path = os.path.join(root, file)
try:
with open(file_path, "r", encoding="utf-8") as f:
post = frontmatter.load(f)
except Exception as e:
print(f"Error loading frontmatter from {file_path}: {e}")
continue
if "created" not in post.metadata:
filename_date = extract_date_from_filename(file)
if filename_date:
post.metadata["created"] = filename_date
else:
created_time = datetime.utcfromtimestamp(os.path.getctime(file_path))
if created_time >= datetime.utcnow() - timedelta(days=1):
modified_time = get_file_modified_time(file_path)
post.metadata["created"] = modified_time
else:
created_time_str = created_time.strftime("%Y-%m-%d %H:%M:%SZ")
post.metadata["created"] = created_time_str
post_content = frontmatter.dumps(post)
try:
with open(file_path, "w", encoding="utf-8") as f:
f.write(post_content)
print(f"Added 'created' to {file_path}")
except Exception as e:
print(f"Error writing to {file_path}: {e}")
else:
print(f"'created' already exists in {file_path}")
if __name__ == "__main__":
current_directory = os.getcwd()
process_markdown_files(current_directory)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment