Skip to content

Instantly share code, notes, and snippets.

@lbmaian
Last active October 4, 2025 02:00
Show Gist options
  • Save lbmaian/24f0dd0bb5d09c51ba46c050f1971e7d to your computer and use it in GitHub Desktop.
Save lbmaian/24f0dd0bb5d09c51ba46c050f1971e7d to your computer and use it in GitHub Desktop.
Encodes/decodes and merges one or more watchmarker database/JSON files depending on --format
#!/usr/bin/env python3
import argparse
import sys
import os
import base64
import json
import inspect
argparser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
description='Encodes/decodes and merges one or more watchmarker database/JSON files depending on --format',
epilog=inspect.cleandoc(
'''
File format: plain JSON (typically .json file), or base64-encoded UTF-8 JSON (typically .database file),
containing an array of objects, each representing a watched video with keys:
strIdent, intTimestamp, strTitle, intCount
If multiple files are specified, their decoded JSON arrays are concatenated, then reencoded into base64.
If objects with the same strIdent exist across multiple files, they're merged into the earliest conflicting object such that:
strTitle: unchanged (effectively preferring the object in the earliest specified file)
intTimestamp: lowest value
intCount: highest value
Example usage:
Reads plain JSON database (.json) and output in base64-encoded JSON:
%(prog)s youtube-watchmarker-2025-09-29.database.json
Reads base64-encoded JSON (.database) and output in pretty-printed JSON:
%(prog)s --format json watchmarker.2025.09.24.database
Merge base64-encoded JSON database with plain JSON database and output in base64-encoded JSON:
%(prog)s watchmarker.2025.09.24.database youtube-watchmarker-2025-09-29.database.json
Same as above but output in pretty-printed JSON:
%(prog)s --format json watchmarker.2025.09.24.database youtube-watchmarker-2025-09-29.database.json
'''))
argparser.add_argument('--format', choices=['json', 'database'], default='database',
help='if "json", outputs pretty-printed JSON; if "database" (default), outputs base64-encoded UTF-8 JSON')
argparser.add_argument('filename', nargs='+',
help='JSON file or base64-encoded UTF-8 JSON file (multiple can be specified)')
args = argparser.parse_args()
# Note on watchmarker's DB codec JS implementation:
# encoding: btoa(unescape(encodeURIComponent(JSON.stringify(...))))
# decoding: JSON.parse(decodeURIComponent(escape(atob(...))))
# The unescape(encodeURIComponent(...)) and decodeURIComponent(escape(...)) are a workaround
# for btoa/atob's inability to handle unicode code points which UTF-8 encoding is larger than a byte.
# The workaround effectively encodes the original string as individual UTF-8 bytes, while decoding reverses this.
# Python's base64 works with byte strings directly and so doesn't need such workarounds.
merged_entries_dict = {}
for filename in args.filename:
with open(filename, 'rb') as f:
# Fortunately, database JSON files and base64-encoded JSON files have no possible overlap,
# so can just try parsing it as JSON first, then as base64-encoded JSON.
bytes = f.read()
try:
# can handle UTF-8 byte strings so no need for decode('utf-8')
entries = json.loads(bytes)
except json.JSONDecodeError:
entries = json.loads(base64.b64decode(bytes))
for entry in entries:
id = entry['strIdent']
existing_entry = merged_entries_dict.get(id)
if not existing_entry:
merged_entries_dict[id] = entry
else:
if existing_entry['intTimestamp'] > entry['intTimestamp']:
existing_entry['intTimestamp'] = entry['intTimestamp']
if existing_entry['intCount'] < entry['intCount']:
existing_entry['intCount'] = entry['intCount']
merged_entries = list(merged_entries_dict.values())
try:
if args.format == 'json':
print(json.dumps(merged_entries, ensure_ascii=False, indent='\t'))
else:
merged_json = json.dumps(merged_entries, ensure_ascii=False)
sys.stdout.write(base64.b64encode(merged_json.encode('utf-8')).decode('ascii'))
except BrokenPipeError:
# Typically happens if piping to an interactive program like `less` and exiting before reading full output
# https://docs.python.org/3/library/signal.html#note-on-sigpipe
devnull = os.open(os.devnull, os.O_WRONLY)
os.dup2(devnull, sys.stdout.fileno())
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment