Skip to content

Instantly share code, notes, and snippets.

@esweeney-cg
Created October 9, 2024 15:37
Show Gist options
  • Save esweeney-cg/9d95d30906e7788433c682273f6192ca to your computer and use it in GitHub Desktop.
Save esweeney-cg/9d95d30906e7788433c682273f6192ca to your computer and use it in GitHub Desktop.
convert google keep files from takeout json to markdown for zet
import os
import json
import datetime
import random
import string
import re
def format_date_for_filename(timestamp_usec):
"""Convert timestamp (in microseconds) to a numerical date string for filenames"""
date = datetime.datetime.fromtimestamp(timestamp_usec / 1e6)
return date.strftime("%Y-%m-%d")
def generate_random_suffix(length=3):
"""Generate a random alphanumeric suffix"""
return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))
def extract_markdown_content(note):
"""Extract the content from the JSON note and format it as Markdown"""
title = note.get("title", None) or "Untitled"
# Extract content from either textContent or listContent
if "textContent" in note:
content = note["textContent"]
elif "listContent" in note:
content = "\n".join([item["text"] for item in note["listContent"]])
else:
content = "No content available."
# Convert timestamp to formatted date
timestamp_usec = note.get("createdTimestampUsec", 0)
date_str = format_date_for_filename(timestamp_usec)
# Create markdown content
md_content = f"---\ntitle: {title}\n---\n\n{content}\n"
return md_content, title, date_str
def sanitize_filename(title):
"""Replace special characters and spaces in title to make it safe for filenames"""
# Replace spaces with underscores
sanitized = title.replace(" ", "_")
# Remove invalid filename characters
sanitized = re.sub(r'[\\/*?:"<>|]', "_", sanitized)
return sanitized
def json_to_markdown(filename):
"""Convert a single JSON file to a Markdown file"""
with open(filename, "r") as f:
data = json.load(f)
# Extract markdown content, title, and formatted date
md_content, title, date_str = extract_markdown_content(data)
# Sanitize title for filename (replace spaces with underscores, remove special chars)
sanitized_title = sanitize_filename(title)
# If the title is empty or Untitled, use the date for the filename
if sanitized_title == "Untitled":
base_filename = date_str
else:
base_filename = sanitized_title
# Add a random suffix to prevent duplicates
suffix = generate_random_suffix()
md_filename = f"{base_filename}-{suffix}.md"
# Write the markdown content to a file
with open(md_filename, "w") as md_file:
md_file.write(md_content)
print(f"Created {md_filename}")
def process_json_files():
"""Process all JSON files in the current directory"""
for filename in os.listdir():
if filename.endswith(".json"):
json_to_markdown(filename)
if __name__ == "__main__":
process_json_files()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment