Last active
October 4, 2025 02:00
-
-
Save lbmaian/24f0dd0bb5d09c51ba46c050f1971e7d to your computer and use it in GitHub Desktop.
Encodes/decodes and merges one or more watchmarker database/JSON files depending on --format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import sys | |
import os | |
import base64 | |
import json | |
import inspect | |
argparser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, | |
description='Encodes/decodes and merges one or more watchmarker database/JSON files depending on --format', | |
epilog=inspect.cleandoc( | |
''' | |
File format: plain JSON (typically .json file), or base64-encoded UTF-8 JSON (typically .database file), | |
containing an array of objects, each representing a watched video with keys: | |
strIdent, intTimestamp, strTitle, intCount | |
If multiple files are specified, their decoded JSON arrays are concatenated, then reencoded into base64. | |
If objects with the same strIdent exist across multiple files, they're merged into the earliest conflicting object such that: | |
strTitle: unchanged (effectively preferring the object in the earliest specified file) | |
intTimestamp: lowest value | |
intCount: highest value | |
Example usage: | |
Reads plain JSON database (.json) and output in base64-encoded JSON: | |
%(prog)s youtube-watchmarker-2025-09-29.database.json | |
Reads base64-encoded JSON (.database) and output in pretty-printed JSON: | |
%(prog)s --format json watchmarker.2025.09.24.database | |
Merge base64-encoded JSON database with plain JSON database and output in base64-encoded JSON: | |
%(prog)s watchmarker.2025.09.24.database youtube-watchmarker-2025-09-29.database.json | |
Same as above but output in pretty-printed JSON: | |
%(prog)s --format json watchmarker.2025.09.24.database youtube-watchmarker-2025-09-29.database.json | |
''')) | |
argparser.add_argument('--format', choices=['json', 'database'], default='database', | |
help='if "json", outputs pretty-printed JSON; if "database" (default), outputs base64-encoded UTF-8 JSON') | |
argparser.add_argument('filename', nargs='+', | |
help='JSON file or base64-encoded UTF-8 JSON file (multiple can be specified)') | |
args = argparser.parse_args() | |
# Note on watchmarker's DB codec JS implementation: | |
# encoding: btoa(unescape(encodeURIComponent(JSON.stringify(...)))) | |
# decoding: JSON.parse(decodeURIComponent(escape(atob(...)))) | |
# The unescape(encodeURIComponent(...)) and decodeURIComponent(escape(...)) are a workaround | |
# for btoa/atob's inability to handle unicode code points which UTF-8 encoding is larger than a byte. | |
# The workaround effectively encodes the original string as individual UTF-8 bytes, while decoding reverses this. | |
# Python's base64 works with byte strings directly and so doesn't need such workarounds. | |
merged_entries_dict = {} | |
for filename in args.filename: | |
with open(filename, 'rb') as f: | |
# Fortunately, database JSON files and base64-encoded JSON files have no possible overlap, | |
# so can just try parsing it as JSON first, then as base64-encoded JSON. | |
bytes = f.read() | |
try: | |
# can handle UTF-8 byte strings so no need for decode('utf-8') | |
entries = json.loads(bytes) | |
except json.JSONDecodeError: | |
entries = json.loads(base64.b64decode(bytes)) | |
for entry in entries: | |
id = entry['strIdent'] | |
existing_entry = merged_entries_dict.get(id) | |
if not existing_entry: | |
merged_entries_dict[id] = entry | |
else: | |
if existing_entry['intTimestamp'] > entry['intTimestamp']: | |
existing_entry['intTimestamp'] = entry['intTimestamp'] | |
if existing_entry['intCount'] < entry['intCount']: | |
existing_entry['intCount'] = entry['intCount'] | |
merged_entries = list(merged_entries_dict.values()) | |
try: | |
if args.format == 'json': | |
print(json.dumps(merged_entries, ensure_ascii=False, indent='\t')) | |
else: | |
merged_json = json.dumps(merged_entries, ensure_ascii=False) | |
sys.stdout.write(base64.b64encode(merged_json.encode('utf-8')).decode('ascii')) | |
except BrokenPipeError: | |
# Typically happens if piping to an interactive program like `less` and exiting before reading full output | |
# https://docs.python.org/3/library/signal.html#note-on-sigpipe | |
devnull = os.open(os.devnull, os.O_WRONLY) | |
os.dup2(devnull, sys.stdout.fileno()) | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment