Created
July 15, 2024 23:13
-
-
Save jacklinke/9225dc282dc16ecd1a6a11996d5505ab to your computer and use it in GitHub Desktop.
Script to add a `created` frontmatter property to any markdown file missing one
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""This script processes markdown files in the current directory and all subdirectories, | |
adding a `created` property to the frontmatter if it does not already exist. | |
Process: | |
1. Recursively identify markdown files (*.md) in the current folder and all subfolders. | |
2. Skip any directories specified in the `DIRS_TO_SKIP` tuple. | |
3. For each markdown file: | |
a. Check if the frontmatter has a 'created' property. | |
b. If 'created' exists, move on to the next file. | |
c. If 'created' does not exist: | |
i. If the filename contains a date in the format '%Y%m%d' or '%Y-%m-%d', set the 'created' property to that value formatted as '%Y-%m-%d'. | |
ii. If the file's 'Created' metadata is within the past 24 hours, assume it's an error and use the file's 'Modified' metadata attribute to populate the 'created' property, formatted as 'YYYY-MM-DD HH:MM:SSZ'. | |
iii. Otherwise, use the file's 'Created' metadata attribute to populate the 'created' property, formatted as 'YYYY-MM-DD HH:MM:SSZ'. | |
Usage: | |
1. Install frontmatter if not already present: `pip install python-frontmatter`. | |
2. Place the script in the root directory where your markdown files are located. | |
3. Run the script using `python add_created_property.py`. | |
""" | |
import os | |
import frontmatter | |
from datetime import datetime, timedelta | |
import re | |
# Directories to skip | |
DIRS_TO_SKIP = ("_templates",) | |
def get_file_created_time(file_path): | |
"""Get the creation time of a file and format it to the desired format. | |
Args: | |
file_path (str): The path to the file. | |
Returns: | |
str: The formatted creation time. | |
""" | |
created_time = os.path.getctime(file_path) | |
return datetime.utcfromtimestamp(created_time).strftime("%Y-%m-%d %H:%M:%SZ") | |
def get_file_modified_time(file_path): | |
"""Get the modified time of a file and format it to the desired format. | |
Args: | |
file_path (str): The path to the file. | |
Returns: | |
str: The formatted modified time. | |
""" | |
modified_time = os.path.getmtime(file_path) | |
return datetime.utcfromtimestamp(modified_time).strftime("%Y-%m-%d %H:%M:%SZ") | |
def extract_date_from_filename(filename): | |
"""Extract a date from the filename if it matches the patterns '%Y%m%d' or '%Y-%m-%d'. | |
Args: | |
filename (str): The name of the file. | |
Returns: | |
str: The extracted and formatted date, or None if no date is found. | |
""" | |
date_patterns = [r"\d{8}", r"\d{4}-\d{2}-\d{2}"] | |
for pattern in date_patterns: | |
match = re.search(pattern, filename) | |
if match: | |
date_str = match.group() | |
try: | |
if len(date_str) == 8: | |
return datetime.strptime(date_str, "%Y%m%d").strftime("%Y-%m-%d") | |
elif len(date_str) == 10: | |
return datetime.strptime(date_str, "%Y-%m-%d").strftime("%Y-%m-%d") | |
except ValueError: | |
continue | |
return None | |
def process_markdown_files(directory): | |
"""Recursively process markdown files in the given directory to check and add 'created' property in frontmatter. | |
Args: | |
directory (str): The root directory to start the search. | |
""" | |
for root, dirs, files in os.walk(directory): | |
# Skip directories specified in DIRS_TO_SKIP | |
dirs[:] = [d for d in dirs if d not in DIRS_TO_SKIP] | |
for file in files: | |
if file.endswith(".md"): | |
file_path = os.path.join(root, file) | |
try: | |
with open(file_path, "r", encoding="utf-8") as f: | |
post = frontmatter.load(f) | |
except Exception as e: | |
print(f"Error loading frontmatter from {file_path}: {e}") | |
continue | |
if "created" not in post.metadata: | |
filename_date = extract_date_from_filename(file) | |
if filename_date: | |
post.metadata["created"] = filename_date | |
else: | |
created_time = datetime.utcfromtimestamp(os.path.getctime(file_path)) | |
if created_time >= datetime.utcnow() - timedelta(days=1): | |
modified_time = get_file_modified_time(file_path) | |
post.metadata["created"] = modified_time | |
else: | |
created_time_str = created_time.strftime("%Y-%m-%d %H:%M:%SZ") | |
post.metadata["created"] = created_time_str | |
post_content = frontmatter.dumps(post) | |
try: | |
with open(file_path, "w", encoding="utf-8") as f: | |
f.write(post_content) | |
print(f"Added 'created' to {file_path}") | |
except Exception as e: | |
print(f"Error writing to {file_path}: {e}") | |
else: | |
print(f"'created' already exists in {file_path}") | |
if __name__ == "__main__": | |
current_directory = os.getcwd() | |
process_markdown_files(current_directory) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment