Created
October 9, 2024 15:37
-
-
Save esweeney-cg/9d95d30906e7788433c682273f6192ca to your computer and use it in GitHub Desktop.
convert google keep files from takeout json to markdown for zet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import json | |
| import datetime | |
| import random | |
| import string | |
| import re | |
| def format_date_for_filename(timestamp_usec): | |
| """Convert timestamp (in microseconds) to a numerical date string for filenames""" | |
| date = datetime.datetime.fromtimestamp(timestamp_usec / 1e6) | |
| return date.strftime("%Y-%m-%d") | |
| def generate_random_suffix(length=3): | |
| """Generate a random alphanumeric suffix""" | |
| return "".join(random.choices(string.ascii_lowercase + string.digits, k=length)) | |
| def extract_markdown_content(note): | |
| """Extract the content from the JSON note and format it as Markdown""" | |
| title = note.get("title", None) or "Untitled" | |
| # Extract content from either textContent or listContent | |
| if "textContent" in note: | |
| content = note["textContent"] | |
| elif "listContent" in note: | |
| content = "\n".join([item["text"] for item in note["listContent"]]) | |
| else: | |
| content = "No content available." | |
| # Convert timestamp to formatted date | |
| timestamp_usec = note.get("createdTimestampUsec", 0) | |
| date_str = format_date_for_filename(timestamp_usec) | |
| # Create markdown content | |
| md_content = f"---\ntitle: {title}\n---\n\n{content}\n" | |
| return md_content, title, date_str | |
| def sanitize_filename(title): | |
| """Replace special characters and spaces in title to make it safe for filenames""" | |
| # Replace spaces with underscores | |
| sanitized = title.replace(" ", "_") | |
| # Remove invalid filename characters | |
| sanitized = re.sub(r'[\\/*?:"<>|]', "_", sanitized) | |
| return sanitized | |
| def json_to_markdown(filename): | |
| """Convert a single JSON file to a Markdown file""" | |
| with open(filename, "r") as f: | |
| data = json.load(f) | |
| # Extract markdown content, title, and formatted date | |
| md_content, title, date_str = extract_markdown_content(data) | |
| # Sanitize title for filename (replace spaces with underscores, remove special chars) | |
| sanitized_title = sanitize_filename(title) | |
| # If the title is empty or Untitled, use the date for the filename | |
| if sanitized_title == "Untitled": | |
| base_filename = date_str | |
| else: | |
| base_filename = sanitized_title | |
| # Add a random suffix to prevent duplicates | |
| suffix = generate_random_suffix() | |
| md_filename = f"{base_filename}-{suffix}.md" | |
| # Write the markdown content to a file | |
| with open(md_filename, "w") as md_file: | |
| md_file.write(md_content) | |
| print(f"Created {md_filename}") | |
| def process_json_files(): | |
| """Process all JSON files in the current directory""" | |
| for filename in os.listdir(): | |
| if filename.endswith(".json"): | |
| json_to_markdown(filename) | |
| if __name__ == "__main__": | |
| process_json_files() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment