Skip to content

Instantly share code, notes, and snippets.

@getjump
Created January 25, 2026 23:39
Show Gist options
  • Select an option

  • Save getjump/d71891ebec8254c6260cd6eea3b46bd5 to your computer and use it in GitHub Desktop.

Select an option

Save getjump/d71891ebec8254c6260cd6eea3b46bd5 to your computer and use it in GitHub Desktop.
session store sanitizer (privacy respecting)
import argparse
import json
import os
import re
import hashlib
# -----------------------
# Settings / patterns
# -----------------------
REDACTED = "REDACTED"
# Keys that should be REMOVED from the output entirely (not redacted)
DROP_KEYS_LOWER = {
"structuredclonestate",
}
# Keys that should be redacted (value replaced with REDACTED)
SENSITIVE_KEY_RE = re.compile(
r"(pass(word)?|pwd|secret|token|jwt|bearer|auth(orization)?|session|sid|sess|csrf|xsrf|cookie|"
r"localstorage|sessionstorage|indexeddb|storage|formdata|postdata|"
r"credit|card|iban|ssn|email|phone|address|"
r"device(id)?|client(id)?|telemetry|fingerprint|api[_-]?key|private|key|signature)",
re.IGNORECASE,
)
# Human-readable titles/labels etc that often contain private info
TITLE_KEY_RE = re.compile(
r"(^title$|tabtitle|pagetitle|entrytitle|label|caption|headline|subject|name$)",
re.IGNORECASE,
)
# If a value looks like a token/credential, redact it even if the key is innocent
SENSITIVE_VALUE_RE = re.compile(
r"(eyJ[a-zA-Z0-9_\-]+=*\.[a-zA-Z0-9_\-]+=*\.[a-zA-Z0-9_\-]+=*)" # JWT-ish
r"|(\bBearer\s+[A-Za-z0-9\-_\.=]+\b)"
r"|(\b[A-Fa-f0-9]{32,}\b)" # long hex
r"|(\b[A-Za-z0-9_\-]{40,}\b)", # long random token-ish
re.IGNORECASE,
)
# Detect URL-like strings (whole-string or embedded)
URL_EMBED_RE = re.compile(r"(https?|file)://[^\s\"\'<>]+", re.IGNORECASE)
URL_WHOLE_RE = re.compile(r"^(https?|file)://", re.IGNORECASE)
def url_id(u: str, n: int = 12) -> str:
"""Stable short id so dev can see uniqueness without leaking URL data."""
h = hashlib.sha256(u.encode("utf-8", errors="ignore")).hexdigest()
return h[:n]
def sanitize_url_opaque(url: str) -> str:
"""
Replace any URL with an opaque placeholder that does not contain domain/path/query/fragment.
"""
return f"url:{REDACTED}:{url_id(url)}"
def sanitize_string(s: str) -> str:
# Whole-string URL
st = s.strip()
if URL_WHOLE_RE.match(st):
return sanitize_url_opaque(st)
# Embedded URLs inside longer strings
def _url_repl(m: re.Match) -> str:
return sanitize_url_opaque(m.group(0))
s2 = URL_EMBED_RE.sub(_url_repl, s)
# Token-ish content
if SENSITIVE_VALUE_RE.search(s2):
return REDACTED
return s2
def sanitize(obj, *, keep_keys: set[str]):
"""
Recursively sanitize JSON-like structures.
- remove DROP_KEYS_LOWER entirely
- redact TITLE_KEY_RE keys
- redact SENSITIVE_KEY_RE keys
- sanitize strings (URLs -> opaque; token-ish -> REDACTED)
"""
if isinstance(obj, dict):
new = {}
for k, v in obj.items():
ks = str(k)
ksl = ks.lower()
# Keep key forcefully (use carefully)
if ks in keep_keys:
new[k] = sanitize(v, keep_keys=keep_keys)
continue
# Drop keys entirely
if ksl in DROP_KEYS_LOWER:
continue
# Redact titles/labels
if TITLE_KEY_RE.search(ks):
new[k] = REDACTED
continue
# Redact sensitive keys
if SENSITIVE_KEY_RE.search(ks):
new[k] = REDACTED
continue
# Recurse
new[k] = sanitize(v, keep_keys=keep_keys)
return new
if isinstance(obj, list):
return [sanitize(x, keep_keys=keep_keys) for x in obj]
if isinstance(obj, str):
return sanitize_string(obj)
# numbers / bool / None unchanged
return obj
def main():
ap = argparse.ArgumentParser(
description="Sanitize sessionstore-like JSON: drop structuredCloneState, redact titles/tokens/cookies/storage, and remove domains from URLs."
)
ap.add_argument("input", help="Input JSON file")
ap.add_argument("-o", "--output", required=True, help="Output sanitized JSON file")
ap.add_argument(
"--keep-key",
action="append",
default=[],
help="Key name to keep (never drop/redact), can be repeated",
)
ap.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
args = ap.parse_args()
with open(args.input, "r", encoding="utf-8") as f:
data = json.load(f)
sanitized = sanitize(data, keep_keys=set(args.keep_key))
os.makedirs(os.path.dirname(os.path.abspath(args.output)), exist_ok=True)
with open(args.output, "w", encoding="utf-8") as f:
if args.pretty:
json.dump(sanitized, f, ensure_ascii=False, indent=2)
else:
json.dump(sanitized, f, ensure_ascii=False, separators=(",", ":"))
print(f"Sanitized -> {args.output}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment