krasserm · May 28, 2026 05:07
diff --git a/castedit.py b/castedit.py
 #!/usr/bin/env python3
 """Edit asciicast v3 recordings: report a compact timeline, then apply timing edits.

 asciicast v3 stores each event as a JSON array ``[interval, code, data]`` where
 ``interval`` is *seconds since the previous event* (relative timing). Because the
 timeline is relative, trimming idle gaps and speeding up regions are just
 arithmetic on the ``interval`` field, with no downstream timestamps to repair.
 The header line and every event's ``data`` payload are passed through verbatim.

 This script is the engine for the cast-edit skill. It has three modes:

  analyze   read the whole cast, print a small summary (duration, idle gaps,
            detected typing runs with decoded snippets). This is the interface
            the model reasons over, so the full cast never enters context.
  show      dump the raw events in a time window, for inspecting one region.
  edit      apply a JSON plan (idle cap, speed regions, cuts) and write a new cast.

 Timecodes everywhere refer to the ORIGINAL timeline (cumulative seconds in the
 source cast), so a region found by ``analyze`` can be fed straight back to ``edit``.
 """
 import argparse
 import json
 import re
 import sys

 # --- ANSI / control stripping, for reconstructing what was typed -------------
 _OSC = re.compile(r"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)")   # OSC ... BEL/ST
 _CSI = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")             # CSI ... final byte
 _ESC = re.compile(r"\x1b[ -/]*[0-~]")                     # other 2-char escapes


 def visible(data, code):
    """Best-effort plain text a chunk drew on screen (approximate).

    Strips ANSI escapes and applies backspaces. Terminal redraws (e.g. shell
    autosuggestions) make exact reconstruction impossible, so treat the result
    as a hint, not ground truth. Input ('i') events carry literal keystrokes.
    """
    s = data if code == "i" else _ESC.sub("", _CSI.sub("", _OSC.sub("", data)))
    out = []
    for ch in s:
        if ch == "\b":
            if out:
                out.pop()
        elif ch in "\r\n\t":
            continue
        elif ch >= " " and ch != "\x7f":
            out.append(ch)
    return "".join(out)


 # --- loading -----------------------------------------------------------------
 def load(path):
    """Return (header_dict, rows) where rows preserve order.

    Each row is (kind, payload): kind 'h' header, 'c' comment/blank (raw str),
    'e' event ([interval, code, data]).
    """
    with open(path) as f:
        lines = f.read().splitlines()
    if not lines:
        sys.exit("empty file")
    header = json.loads(lines[0])
    if header.get("version") != 3:
        sys.exit(f"not an asciicast v3 file (version={header.get('version')!r}); "
                 f"convert first: asciinema convert in out (writes v3 by default)")
    rows = [("h", header)]
    for line in lines[1:]:
        if not line or line.startswith("#"):
            rows.append(("c", line))
        else:
            rows.append(("e", json.loads(line)))
    return header, rows


 def event_times(rows):
    """Firing timecode (cumulative original seconds) for each event row index."""
    t, acc = {}, 0.0
    for i, (kind, payload) in enumerate(rows):
        if kind == "e":
            acc += payload[0]
            t[i] = acc
    return t, acc


 # --- analyze -----------------------------------------------------------------
 def find_idle(rows, times, threshold):
    gaps = []
    for i, (kind, payload) in enumerate(rows):
        if kind != "e":
            continue
        interval = payload[0]
        if interval > threshold:
            begins = times[i] - interval
            after = visible(payload[2], payload[1])[:32]
            gaps.append((i, begins, interval, after))
    return gaps


 def find_typing(rows, times, min_gap, max_gap, min_events):
    """Maximal runs of consecutive events spaced at human keystroke cadence.

    Cadence is the strong signal (output bursts have ~0 gaps; idle has one big
    gap; typing is a sustained run of small human-paced gaps). Payload size is
    deliberately not used as a hard filter because autosuggestions inflate it.
    """
    runs, run = [], []
    ev_idx = [i for i, (k, _) in enumerate(rows) if k == "e"]
    for pos, i in enumerate(ev_idx):
        interval = rows[i][1][0]
        paced = min_gap <= interval <= max_gap
        if paced:
            if not run:
                run = [ev_idx[pos - 1]] if pos else [i]
            run.append(i)
        else:
            if len(run) >= min_events:
                runs.append(run)
            run = []
    if len(run) >= min_events:
        runs.append(run)
    out = []
    for run in runs:
        start, end = times[run[0]], times[run[-1]]
        text = "".join(visible(rows[i][1][2], rows[i][1][1]) for i in run)
        text = re.sub(r"\s+", " ", text).strip()[:48]
        out.append((start, end, end - start, len(run), text))
    return out


 def cmd_analyze(args):
    header, rows = load(args.infile)
    times, total = event_times(rows)
    n_events = sum(1 for k, _ in rows if k == "e")
    term = header.get("term", {})

    print(f"cast: {args.infile}")
    print(f"version: 3 | term: {term.get('cols')}x{term.get('rows')} | "
          f"events: {n_events} | duration: {total:.3f}s")

    gaps = find_idle(rows, times, args.idle_threshold)
    reclaim = sum(g[2] - args.idle_threshold for g in gaps)
    print(f"\nIDLE GAPS (interval > {args.idle_threshold}s) -- candidates to trim/cap")
    if gaps:
        print(f"  {'begins(s)':>9}  {'gap(s)':>7}  next-output")
        for _, begins, gap, after in sorted(gaps, key=lambda g: -g[2])[:args.limit]:
            print(f"  {begins:9.3f}  {gap:7.3f}  {after!r}")
        print(f"  -> capping at {args.idle_threshold}s reclaims {reclaim:.3f}s "
              f"(new duration ~{total - reclaim:.3f}s)")
    else:
        print("  (none)")

    runs = find_typing(rows, times, args.type_min_gap, args.type_max_gap,
                       args.min_keystrokes)
    print(f"\nTYPING RUNS (cadence {args.type_min_gap}-{args.type_max_gap}s, "
          f">={args.min_keystrokes} events) -- candidates to speed up")
    if runs:
        print(f"  {'start(s)':>8}  {'end(s)':>7}  {'dur(s)':>6}  {'evts':>4}  text~")
        for start, end, dur, count, text in runs[:args.limit]:
            print(f"  {start:8.3f}  {end:7.3f}  {dur:6.3f}  {count:4d}  {text!r}")
    else:
        print("  (none)")
    print("\nNext: build a plan (see SKILL.md) and run: castedit.py edit IN OUT --plan plan.json")


 # --- show --------------------------------------------------------------------
 def cmd_show(args):
    _, rows = load(args.infile)
    times, _ = event_times(rows)
    for i, (kind, payload) in enumerate(rows):
        if kind != "e":
            continue
        t = times[i]
        if args.start <= t <= args.end:
            print(f"{t:8.3f}  {payload[1]}  {json.dumps(payload[2])}")


 # --- edit --------------------------------------------------------------------
 def in_regions(t, regions):
    for r in regions:
        if r["start"] <= t <= r["end"]:
            return r
    return None


 def cmd_edit(args):
    header, rows = load(args.infile)
    times, before = event_times(rows)

    plan = {}
    if args.plan:
        with open(args.plan) as f:
            plan = json.load(f)
    if args.idle_cap is not None:          # CLI override / shortcut
        plan["idle_cap"] = args.idle_cap
    idle_cap = plan.get("idle_cap")
    speed_regions = plan.get("speed_regions", [])
    cuts = plan.get("cuts", [])
    resume_gap = plan.get("resume_gap", 0.1)

    out = [json.dumps(header, separators=(",", ":"), ensure_ascii=False)]
    carry = 0.0          # error-diffusion accumulator keeps total drift sub-ms
    just_cut = False
    for i, (kind, payload) in enumerate(rows):
        if kind == "h":
            continue
        if kind == "c":
            out.append(payload)
            continue
        interval, code, data = payload
        t = times[i]

        if in_regions(t, cuts):            # drop this event entirely
            just_cut = True
            continue

        if idle_cap is not None:
            interval = min(interval, idle_cap)
        region = in_regions(t, speed_regions)
        if region:
            interval = interval / region["factor"]
        if just_cut:                       # close the hole left by a cut
            interval = min(interval, resume_gap)
            just_cut = False

        carry += interval
        rounded = round(carry, 3)
        if rounded <= 0.0:                 # never emit a negative interval or
            rounded = 0.0                  # "-0.0", which the v3 parser rejects
        carry -= rounded
        # ensure_ascii=False keeps non-ASCII glyphs literal (matching asciinema's
        # own output and keeping files compact); control chars are still escaped.
        out.append(json.dumps([rounded, code, data], ensure_ascii=False))

    with open(args.outfile, "w") as f:
        f.write("\n".join(out) + "\n")

    # recompute new duration for the summary
    _, after = event_times(load(args.outfile)[1])
    print(f"wrote {args.outfile}")
    print(f"duration: {before:.3f}s -> {after:.3f}s "
          f"({before - after:+.3f}s, {after / before * 100:.0f}% of original)")
    print("validate: asciinema convert "
          f"{args.outfile} /tmp/_v.txt --overwrite   (errors here = invalid cast)")


 def main():
    ap = argparse.ArgumentParser(description=__doc__,
                                 formatter_class=argparse.RawDescriptionHelpFormatter)
    sub = ap.add_subparsers(dest="cmd", required=True)

    a = sub.add_parser("analyze", help="print a compact timeline summary")
    a.add_argument("infile")
    a.add_argument("--idle-threshold", type=float, default=0.5)
    a.add_argument("--type-min-gap", type=float, default=0.04)
    a.add_argument("--type-max-gap", type=float, default=0.5)
    a.add_argument("--min-keystrokes", type=int, default=4)
    a.add_argument("--limit", type=int, default=None,
                   help="cap how many gaps/runs to list (default: show all). "
                        "Set this only if a very long cast makes the output unwieldy.")
    a.set_defaults(func=cmd_analyze)

    s = sub.add_parser("show", help="dump raw events in a time window")
    s.add_argument("infile")
    s.add_argument("--start", type=float, default=0.0)
    s.add_argument("--end", type=float, default=float("inf"))
    s.set_defaults(func=cmd_show)

    e = sub.add_parser("edit", help="apply a JSON plan and write a new cast")
    e.add_argument("infile")
    e.add_argument("outfile")
    e.add_argument("--plan", help="JSON plan file (see SKILL.md for schema)")
    e.add_argument("--idle-cap", type=float, default=None,
                   help="shortcut: cap every gap at N seconds (overrides plan)")
    e.set_defaults(func=cmd_edit)

    args = ap.parse_args()
    args.func(args)


 if __name__ == "__main__":
    main()
diff --git a/SKILL.md b/SKILL.md
	#!/usr/bin/env python3
	"""Edit asciicast v3 recordings: report a compact timeline, then apply timing edits.

	asciicast v3 stores each event as a JSON array ``[interval, code, data]`` where
	``interval`` is seconds since the previous event (relative timing). Because the
	timeline is relative, trimming idle gaps and speeding up regions are just
	arithmetic on the ``interval`` field, with no downstream timestamps to repair.
	The header line and every event's ``data`` payload are passed through verbatim.

	This script is the engine for the cast-edit skill. It has three modes:

	analyze read the whole cast, print a small summary (duration, idle gaps,
	detected typing runs with decoded snippets). This is the interface
	the model reasons over, so the full cast never enters context.
	show dump the raw events in a time window, for inspecting one region.
	edit apply a JSON plan (idle cap, speed regions, cuts) and write a new cast.

	Timecodes everywhere refer to the ORIGINAL timeline (cumulative seconds in the
	source cast), so a region found by ``analyze`` can be fed straight back to ``edit``.
	"""
	import argparse
	import json
	import re
	import sys

	# --- ANSI / control stripping, for reconstructing what was typed -------------
	_OSC = re.compile(r"\x1b\][^\x07\x1b]*(?:\x07\|\x1b\\)") # OSC ... BEL/ST
	_CSI = re.compile(r"\x1b\[[0-?][ -/][@-~]") # CSI ... final byte
	_ESC = re.compile(r"\x1b[ -/]*[0-~]") # other 2-char escapes


	def visible(data, code):
	"""Best-effort plain text a chunk drew on screen (approximate).

	Strips ANSI escapes and applies backspaces. Terminal redraws (e.g. shell
	autosuggestions) make exact reconstruction impossible, so treat the result
	as a hint, not ground truth. Input ('i') events carry literal keystrokes.
	"""
	s = data if code == "i" else _ESC.sub("", _CSI.sub("", _OSC.sub("", data)))
	out = []
	for ch in s:
	if ch == "\b":
	if out:
	out.pop()
	elif ch in "\r\n\t":
	continue
	elif ch >= " " and ch != "\x7f":
	out.append(ch)
	return "".join(out)


	# --- loading -----------------------------------------------------------------
	def load(path):
	"""Return (header_dict, rows) where rows preserve order.

	Each row is (kind, payload): kind 'h' header, 'c' comment/blank (raw str),
	'e' event ([interval, code, data]).
	"""
	with open(path) as f:
	lines = f.read().splitlines()
	if not lines:
	sys.exit("empty file")
	header = json.loads(lines[0])
	if header.get("version") != 3:
	sys.exit(f"not an asciicast v3 file (version={header.get('version')!r}); "
	f"convert first: asciinema convert in out (writes v3 by default)")
	rows = [("h", header)]
	for line in lines[1:]:
	if not line or line.startswith("#"):
	rows.append(("c", line))
	else:
	rows.append(("e", json.loads(line)))
	return header, rows


	def event_times(rows):
	"""Firing timecode (cumulative original seconds) for each event row index."""
	t, acc = {}, 0.0
	for i, (kind, payload) in enumerate(rows):
	if kind == "e":
	acc += payload[0]
	t[i] = acc
	return t, acc


	# --- analyze -----------------------------------------------------------------
	def find_idle(rows, times, threshold):
	gaps = []
	for i, (kind, payload) in enumerate(rows):
	if kind != "e":
	continue
	interval = payload[0]
	if interval > threshold:
	begins = times[i] - interval
	after = visible(payload[2], payload[1])[:32]
	gaps.append((i, begins, interval, after))
	return gaps


	def find_typing(rows, times, min_gap, max_gap, min_events):
	"""Maximal runs of consecutive events spaced at human keystroke cadence.

	Cadence is the strong signal (output bursts have ~0 gaps; idle has one big
	gap; typing is a sustained run of small human-paced gaps). Payload size is
	deliberately not used as a hard filter because autosuggestions inflate it.
	"""
	runs, run = [], []
	ev_idx = [i for i, (k, _) in enumerate(rows) if k == "e"]
	for pos, i in enumerate(ev_idx):
	interval = rows[i][1][0]
	paced = min_gap <= interval <= max_gap
	if paced:
	if not run:
	run = [ev_idx[pos - 1]] if pos else [i]
	run.append(i)
	else:
	if len(run) >= min_events:
	runs.append(run)
	run = []
	if len(run) >= min_events:
	runs.append(run)
	out = []
	for run in runs:
	start, end = times[run[0]], times[run[-1]]
	text = "".join(visible(rows[i][1][2], rows[i][1][1]) for i in run)
	text = re.sub(r"\s+", " ", text).strip()[:48]
	out.append((start, end, end - start, len(run), text))
	return out


	def cmd_analyze(args):
	header, rows = load(args.infile)
	times, total = event_times(rows)
	n_events = sum(1 for k, _ in rows if k == "e")
	term = header.get("term", {})

	print(f"cast: {args.infile}")
	print(f"version: 3 \| term: {term.get('cols')}x{term.get('rows')} \| "
	f"events: {n_events} \| duration: {total:.3f}s")

	gaps = find_idle(rows, times, args.idle_threshold)
	reclaim = sum(g[2] - args.idle_threshold for g in gaps)
	print(f"\nIDLE GAPS (interval > {args.idle_threshold}s) -- candidates to trim/cap")
	if gaps:
	print(f" {'begins(s)':>9} {'gap(s)':>7} next-output")
	for _, begins, gap, after in sorted(gaps, key=lambda g: -g[2])[:args.limit]:
	print(f" {begins:9.3f} {gap:7.3f} {after!r}")
	print(f" -> capping at {args.idle_threshold}s reclaims {reclaim:.3f}s "
	f"(new duration ~{total - reclaim:.3f}s)")
	else:
	print(" (none)")

	runs = find_typing(rows, times, args.type_min_gap, args.type_max_gap,
	args.min_keystrokes)
	print(f"\nTYPING RUNS (cadence {args.type_min_gap}-{args.type_max_gap}s, "
	f">={args.min_keystrokes} events) -- candidates to speed up")
	if runs:
	print(f" {'start(s)':>8} {'end(s)':>7} {'dur(s)':>6} {'evts':>4} text~")
	for start, end, dur, count, text in runs[:args.limit]:
	print(f" {start:8.3f} {end:7.3f} {dur:6.3f} {count:4d} {text!r}")
	else:
	print(" (none)")
	print("\nNext: build a plan (see SKILL.md) and run: castedit.py edit IN OUT --plan plan.json")


	# --- show --------------------------------------------------------------------
	def cmd_show(args):
	_, rows = load(args.infile)
	times, _ = event_times(rows)
	for i, (kind, payload) in enumerate(rows):
	if kind != "e":
	continue
	t = times[i]
	if args.start <= t <= args.end:
	print(f"{t:8.3f} {payload[1]} {json.dumps(payload[2])}")


	# --- edit --------------------------------------------------------------------
	def in_regions(t, regions):
	for r in regions:
	if r["start"] <= t <= r["end"]:
	return r
	return None


	def cmd_edit(args):
	header, rows = load(args.infile)
	times, before = event_times(rows)

	plan = {}
	if args.plan:
	with open(args.plan) as f:
	plan = json.load(f)
	if args.idle_cap is not None: # CLI override / shortcut
	plan["idle_cap"] = args.idle_cap
	idle_cap = plan.get("idle_cap")
	speed_regions = plan.get("speed_regions", [])
	cuts = plan.get("cuts", [])
	resume_gap = plan.get("resume_gap", 0.1)

	out = [json.dumps(header, separators=(",", ":"), ensure_ascii=False)]
	carry = 0.0 # error-diffusion accumulator keeps total drift sub-ms
	just_cut = False
	for i, (kind, payload) in enumerate(rows):
	if kind == "h":
	continue
	if kind == "c":
	out.append(payload)
	continue
	interval, code, data = payload
	t = times[i]

	if in_regions(t, cuts): # drop this event entirely
	just_cut = True
	continue

	if idle_cap is not None:
	interval = min(interval, idle_cap)
	region = in_regions(t, speed_regions)
	if region:
	interval = interval / region["factor"]
	if just_cut: # close the hole left by a cut
	interval = min(interval, resume_gap)
	just_cut = False

	carry += interval
	rounded = round(carry, 3)
	if rounded <= 0.0: # never emit a negative interval or
	rounded = 0.0 # "-0.0", which the v3 parser rejects
	carry -= rounded
	# ensure_ascii=False keeps non-ASCII glyphs literal (matching asciinema's
	# own output and keeping files compact); control chars are still escaped.
	out.append(json.dumps([rounded, code, data], ensure_ascii=False))

	with open(args.outfile, "w") as f:
	f.write("\n".join(out) + "\n")

	# recompute new duration for the summary
	_, after = event_times(load(args.outfile)[1])
	print(f"wrote {args.outfile}")
	print(f"duration: {before:.3f}s -> {after:.3f}s "
	f"({before - after:+.3f}s, {after / before * 100:.0f}% of original)")
	print("validate: asciinema convert "
	f"{args.outfile} /tmp/_v.txt --overwrite (errors here = invalid cast)")


	def main():
	ap = argparse.ArgumentParser(description=__doc__,
	formatter_class=argparse.RawDescriptionHelpFormatter)
	sub = ap.add_subparsers(dest="cmd", required=True)

	a = sub.add_parser("analyze", help="print a compact timeline summary")
	a.add_argument("infile")
	a.add_argument("--idle-threshold", type=float, default=0.5)
	a.add_argument("--type-min-gap", type=float, default=0.04)
	a.add_argument("--type-max-gap", type=float, default=0.5)
	a.add_argument("--min-keystrokes", type=int, default=4)
	a.add_argument("--limit", type=int, default=None,
	help="cap how many gaps/runs to list (default: show all). "
	"Set this only if a very long cast makes the output unwieldy.")
	a.set_defaults(func=cmd_analyze)

	s = sub.add_parser("show", help="dump raw events in a time window")
	s.add_argument("infile")
	s.add_argument("--start", type=float, default=0.0)
	s.add_argument("--end", type=float, default=float("inf"))
	s.set_defaults(func=cmd_show)

	e = sub.add_parser("edit", help="apply a JSON plan and write a new cast")
	e.add_argument("infile")
	e.add_argument("outfile")
	e.add_argument("--plan", help="JSON plan file (see SKILL.md for schema)")
	e.add_argument("--idle-cap", type=float, default=None,
	help="shortcut: cap every gap at N seconds (overrides plan)")
	e.set_defaults(func=cmd_edit)

	args = ap.parse_args()
	args.func(args)


	if __name__ == "__main__":
	main()
No results found