lbmaian · October 4, 2025 02:00
diff --git a/reencode_watchmarker_db.py b/reencode_watchmarker_db.py
 #!/usr/bin/env python3
 import argparse
 import sys
 import os
 import base64
 import json
 import inspect

 argparser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
    description='Encodes/decodes and merges one or more watchmarker database/JSON files depending on --format',
    epilog=inspect.cleandoc(
        '''
        File format: plain JSON (typically .json file), or base64-encoded UTF-8 JSON (typically .database file),
        containing an array of objects, each representing a watched video with keys:
            strIdent, intTimestamp, strTitle, intCount
        
        If multiple files are specified, their decoded JSON arrays are concatenated, then reencoded into base64.
        If objects with the same strIdent exist across multiple files, they're merged into the earliest conflicting object such that:
            strTitle: unchanged (effectively preferring the object in the earliest specified file)
            intTimestamp: lowest value
            intCount: highest value

        Example usage:
        Reads plain JSON database (.json) and output in base64-encoded JSON:
            %(prog)s youtube-watchmarker-2025-09-29.database.json
        Reads base64-encoded JSON (.database) and output in pretty-printed JSON:
            %(prog)s --format json watchmarker.2025.09.24.database
        Merge base64-encoded JSON database with plain JSON database and output in base64-encoded JSON:
            %(prog)s watchmarker.2025.09.24.database youtube-watchmarker-2025-09-29.database.json
        Same as above but output in pretty-printed JSON:
            %(prog)s --format json watchmarker.2025.09.24.database youtube-watchmarker-2025-09-29.database.json
        '''))
 argparser.add_argument('--format', choices=['json', 'database'], default='database',
                       help='if "json", outputs pretty-printed JSON; if "database" (default), outputs base64-encoded UTF-8 JSON')
 argparser.add_argument('filename', nargs='+',
                       help='JSON file or base64-encoded UTF-8 JSON file (multiple can be specified)')
 args = argparser.parse_args()

 # Note on watchmarker's DB codec JS implementation:
 # encoding: btoa(unescape(encodeURIComponent(JSON.stringify(...))))
 # decoding: JSON.parse(decodeURIComponent(escape(atob(...))))
 # The unescape(encodeURIComponent(...)) and decodeURIComponent(escape(...)) are a workaround
 # for btoa/atob's inability to handle unicode code points which UTF-8 encoding is larger than a byte.
 # The workaround effectively encodes the original string as individual UTF-8 bytes, while decoding reverses this.
 # Python's base64 works with byte strings directly and so doesn't need such workarounds.

 merged_entries_dict = {}
 for filename in args.filename:
    with open(filename, 'rb') as f:
        # Fortunately, database JSON files and base64-encoded JSON files have no possible overlap,
        # so can just try parsing it as JSON first, then as base64-encoded JSON.
        bytes = f.read()
        try:
            # can handle UTF-8 byte strings so no need for decode('utf-8')
            entries = json.loads(bytes)
        except json.JSONDecodeError:
            entries = json.loads(base64.b64decode(bytes))
        for entry in entries:
            id = entry['strIdent']
            existing_entry = merged_entries_dict.get(id)
            if not existing_entry:
                merged_entries_dict[id] = entry
            else:
                if existing_entry['intTimestamp'] > entry['intTimestamp']:
                    existing_entry['intTimestamp'] = entry['intTimestamp']
                if existing_entry['intCount'] < entry['intCount']:
                    existing_entry['intCount'] = entry['intCount']
 merged_entries = list(merged_entries_dict.values())

 try:
    if args.format == 'json':
        print(json.dumps(merged_entries, ensure_ascii=False, indent='\t'))
    else:
        merged_json = json.dumps(merged_entries, ensure_ascii=False)
        sys.stdout.write(base64.b64encode(merged_json.encode('utf-8')).decode('ascii'))
 except BrokenPipeError:
    # Typically happens if piping to an interactive program like `less` and exiting before reading full output
    # https://docs.python.org/3/library/signal.html#note-on-sigpipe
    devnull = os.open(os.devnull, os.O_WRONLY)
    os.dup2(devnull, sys.stdout.fileno())
    sys.exit(1)
	#!/usr/bin/env python3
	import argparse
	import sys
	import os
	import base64
	import json
	import inspect

	argparser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
	description='Encodes/decodes and merges one or more watchmarker database/JSON files depending on --format',
	epilog=inspect.cleandoc(
	'''
	File format: plain JSON (typically .json file), or base64-encoded UTF-8 JSON (typically .database file),
	containing an array of objects, each representing a watched video with keys:
	strIdent, intTimestamp, strTitle, intCount

	If multiple files are specified, their decoded JSON arrays are concatenated, then reencoded into base64.
	If objects with the same strIdent exist across multiple files, they're merged into the earliest conflicting object such that:
	strTitle: unchanged (effectively preferring the object in the earliest specified file)
	intTimestamp: lowest value
	intCount: highest value

	Example usage:
	Reads plain JSON database (.json) and output in base64-encoded JSON:
	%(prog)s youtube-watchmarker-2025-09-29.database.json
	Reads base64-encoded JSON (.database) and output in pretty-printed JSON:
	%(prog)s --format json watchmarker.2025.09.24.database
	Merge base64-encoded JSON database with plain JSON database and output in base64-encoded JSON:
	%(prog)s watchmarker.2025.09.24.database youtube-watchmarker-2025-09-29.database.json
	Same as above but output in pretty-printed JSON:
	%(prog)s --format json watchmarker.2025.09.24.database youtube-watchmarker-2025-09-29.database.json
	'''))
	argparser.add_argument('--format', choices=['json', 'database'], default='database',
	help='if "json", outputs pretty-printed JSON; if "database" (default), outputs base64-encoded UTF-8 JSON')
	argparser.add_argument('filename', nargs='+',
	help='JSON file or base64-encoded UTF-8 JSON file (multiple can be specified)')
	args = argparser.parse_args()

	# Note on watchmarker's DB codec JS implementation:
	# encoding: btoa(unescape(encodeURIComponent(JSON.stringify(...))))
	# decoding: JSON.parse(decodeURIComponent(escape(atob(...))))
	# The unescape(encodeURIComponent(...)) and decodeURIComponent(escape(...)) are a workaround
	# for btoa/atob's inability to handle unicode code points which UTF-8 encoding is larger than a byte.
	# The workaround effectively encodes the original string as individual UTF-8 bytes, while decoding reverses this.
	# Python's base64 works with byte strings directly and so doesn't need such workarounds.

	merged_entries_dict = {}
	for filename in args.filename:
	with open(filename, 'rb') as f:
	# Fortunately, database JSON files and base64-encoded JSON files have no possible overlap,
	# so can just try parsing it as JSON first, then as base64-encoded JSON.
	bytes = f.read()
	try:
	# can handle UTF-8 byte strings so no need for decode('utf-8')
	entries = json.loads(bytes)
	except json.JSONDecodeError:
	entries = json.loads(base64.b64decode(bytes))
	for entry in entries:
	id = entry['strIdent']
	existing_entry = merged_entries_dict.get(id)
	if not existing_entry:
	merged_entries_dict[id] = entry
	else:
	if existing_entry['intTimestamp'] > entry['intTimestamp']:
	existing_entry['intTimestamp'] = entry['intTimestamp']
	if existing_entry['intCount'] < entry['intCount']:
	existing_entry['intCount'] = entry['intCount']
	merged_entries = list(merged_entries_dict.values())

	try:
	if args.format == 'json':
	print(json.dumps(merged_entries, ensure_ascii=False, indent='\t'))
	else:
	merged_json = json.dumps(merged_entries, ensure_ascii=False)
	sys.stdout.write(base64.b64encode(merged_json.encode('utf-8')).decode('ascii'))
	except BrokenPipeError:
	# Typically happens if piping to an interactive program like `less` and exiting before reading full output
	# https://docs.python.org/3/library/signal.html#note-on-sigpipe
	devnull = os.open(os.devnull, os.O_WRONLY)
	os.dup2(devnull, sys.stdout.fileno())
	sys.exit(1)
No results found