Skip to content

Instantly share code, notes, and snippets.

@DavidBuchanan314
Created July 25, 2025 14:55
Show Gist options
  • Save DavidBuchanan314/39fa9334e3d182454691d5429a7f199c to your computer and use it in GitHub Desktop.
Save DavidBuchanan314/39fa9334e3d182454691d5429a7f199c to your computer and use it in GitHub Desktop.
"""
iterate thru /export jsonlines, shove them into ~~sqlite~~ rocksdb, then pull them out
again into a new jsonlines where each line is in /log/audit format for one DID
expects input data in "out.jsonlines"
"""
from tqdm import tqdm
import json
import aimrocks
import os
import shutil
DB_PATH = "plc.db"
if os.path.exists(DB_PATH):
shutil.rmtree(DB_PATH)
db = aimrocks.DB(DB_PATH, aimrocks.Options(create_if_missing=True), read_only=False)
for line in tqdm(open("out.jsonlines")):
log = json.loads(line)
did = log["did"]
ts = log["createdAt"]
db.put(did.removeprefix("did:plc:").encode()+ts.encode(), line.rstrip().encode())
with open("plc_audit_log.jsonlines", "wb") as outfile:
it = db.iteritems()
it.seek_to_first()
prev_did = None
ops = []
for k, v in tqdm(it):
did = k[:24]
if prev_did and did != prev_did:
outfile.write(b"["+b",".join(ops)+b"]\n")
ops = []
ops.append(v)
prev_did = did
outfile.write(b"["+b",".join(ops)+b"]\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment