|
#!/usr/bin/env python3 |
|
import sys |
|
import json |
|
import datetime |
|
import html |
|
|
|
def sanitize(text): |
|
""" |
|
Escape HTML tags and special characters so that no HTML is output in any column. |
|
Also escapes pipe characters for safe inclusion in a Markdown table. |
|
""" |
|
# First escape HTML entities (including <, >, and quotes) |
|
escaped = html.escape(text, quote=True) |
|
# Then escape pipe characters to avoid breaking the Markdown table |
|
return escaped.replace("|", "\\|") |
|
|
|
def main(): |
|
# Load the JSON data from standard input |
|
data = json.load(sys.stdin) |
|
|
|
# Print the Markdown table header |
|
print("| Bookmark | Date | Tags |") |
|
print("| -------- | ---- | ---- |") |
|
|
|
for item in data: |
|
raw_url = item.get("href", "") |
|
# Use the URL as fallback if description is empty |
|
raw_description = item.get("description", "") or raw_url |
|
raw_time = item.get("time", "") |
|
raw_tags = item.get("tags", "") |
|
|
|
# Convert time to a date in the format YYYY-MM-DD |
|
try: |
|
dt = datetime.datetime.strptime(raw_time, "%Y-%m-%dT%H:%M:%SZ") |
|
date = dt.strftime("%Y-%m-%d") |
|
except ValueError: |
|
date = raw_time[:10] if raw_time else "" |
|
|
|
# Sanitize URL, description, and tags |
|
safe_url = sanitize(raw_url) |
|
safe_description = sanitize(raw_description) |
|
# Wrap the URL in angle brackets within the inline Markdown link to help parsers handle special characters |
|
bookmark = f"[{safe_description}](<{safe_url}>)" |
|
|
|
if raw_tags: |
|
# Split tags on whitespace, sanitize each, then join with commas |
|
tags = ", ".join(sanitize(tag) for tag in raw_tags.split()) |
|
else: |
|
tags = "" |
|
|
|
# Print the Markdown table row |
|
print(f"| {bookmark} | {date} | {tags} |") |
|
|
|
if __name__ == '__main__': |
|
main() |