Skip to content

Instantly share code, notes, and snippets.

@krasserm
Created May 28, 2026 05:07
Show Gist options
  • Select an option

  • Save krasserm/d745bd99013da875258dd495bf967a2d to your computer and use it in GitHub Desktop.

Select an option

Save krasserm/d745bd99013da875258dd495bf967a2d to your computer and use it in GitHub Desktop.
cast-edit: agentic editing of asciinema .cast recordings (Claude Code skill + Python engine)
#!/usr/bin/env python3
"""Edit asciicast v3 recordings: report a compact timeline, then apply timing edits.
asciicast v3 stores each event as a JSON array ``[interval, code, data]`` where
``interval`` is *seconds since the previous event* (relative timing). Because the
timeline is relative, trimming idle gaps and speeding up regions are just
arithmetic on the ``interval`` field, with no downstream timestamps to repair.
The header line and every event's ``data`` payload are passed through verbatim.
This script is the engine for the cast-edit skill. It has three modes:
analyze read the whole cast, print a small summary (duration, idle gaps,
detected typing runs with decoded snippets). This is the interface
the model reasons over, so the full cast never enters context.
show dump the raw events in a time window, for inspecting one region.
edit apply a JSON plan (idle cap, speed regions, cuts) and write a new cast.
Timecodes everywhere refer to the ORIGINAL timeline (cumulative seconds in the
source cast), so a region found by ``analyze`` can be fed straight back to ``edit``.
"""
import argparse
import json
import re
import sys
# --- ANSI / control stripping, for reconstructing what was typed -------------
_OSC = re.compile(r"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)") # OSC ... BEL/ST
_CSI = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]") # CSI ... final byte
_ESC = re.compile(r"\x1b[ -/]*[0-~]") # other 2-char escapes
def visible(data, code):
"""Best-effort plain text a chunk drew on screen (approximate).
Strips ANSI escapes and applies backspaces. Terminal redraws (e.g. shell
autosuggestions) make exact reconstruction impossible, so treat the result
as a hint, not ground truth. Input ('i') events carry literal keystrokes.
"""
s = data if code == "i" else _ESC.sub("", _CSI.sub("", _OSC.sub("", data)))
out = []
for ch in s:
if ch == "\b":
if out:
out.pop()
elif ch in "\r\n\t":
continue
elif ch >= " " and ch != "\x7f":
out.append(ch)
return "".join(out)
# --- loading -----------------------------------------------------------------
def load(path):
"""Return (header_dict, rows) where rows preserve order.
Each row is (kind, payload): kind 'h' header, 'c' comment/blank (raw str),
'e' event ([interval, code, data]).
"""
with open(path) as f:
lines = f.read().splitlines()
if not lines:
sys.exit("empty file")
header = json.loads(lines[0])
if header.get("version") != 3:
sys.exit(f"not an asciicast v3 file (version={header.get('version')!r}); "
f"convert first: asciinema convert in out (writes v3 by default)")
rows = [("h", header)]
for line in lines[1:]:
if not line or line.startswith("#"):
rows.append(("c", line))
else:
rows.append(("e", json.loads(line)))
return header, rows
def event_times(rows):
"""Firing timecode (cumulative original seconds) for each event row index."""
t, acc = {}, 0.0
for i, (kind, payload) in enumerate(rows):
if kind == "e":
acc += payload[0]
t[i] = acc
return t, acc
# --- analyze -----------------------------------------------------------------
def find_idle(rows, times, threshold):
gaps = []
for i, (kind, payload) in enumerate(rows):
if kind != "e":
continue
interval = payload[0]
if interval > threshold:
begins = times[i] - interval
after = visible(payload[2], payload[1])[:32]
gaps.append((i, begins, interval, after))
return gaps
def find_typing(rows, times, min_gap, max_gap, min_events):
"""Maximal runs of consecutive events spaced at human keystroke cadence.
Cadence is the strong signal (output bursts have ~0 gaps; idle has one big
gap; typing is a sustained run of small human-paced gaps). Payload size is
deliberately not used as a hard filter because autosuggestions inflate it.
"""
runs, run = [], []
ev_idx = [i for i, (k, _) in enumerate(rows) if k == "e"]
for pos, i in enumerate(ev_idx):
interval = rows[i][1][0]
paced = min_gap <= interval <= max_gap
if paced:
if not run:
run = [ev_idx[pos - 1]] if pos else [i]
run.append(i)
else:
if len(run) >= min_events:
runs.append(run)
run = []
if len(run) >= min_events:
runs.append(run)
out = []
for run in runs:
start, end = times[run[0]], times[run[-1]]
text = "".join(visible(rows[i][1][2], rows[i][1][1]) for i in run)
text = re.sub(r"\s+", " ", text).strip()[:48]
out.append((start, end, end - start, len(run), text))
return out
def cmd_analyze(args):
header, rows = load(args.infile)
times, total = event_times(rows)
n_events = sum(1 for k, _ in rows if k == "e")
term = header.get("term", {})
print(f"cast: {args.infile}")
print(f"version: 3 | term: {term.get('cols')}x{term.get('rows')} | "
f"events: {n_events} | duration: {total:.3f}s")
gaps = find_idle(rows, times, args.idle_threshold)
reclaim = sum(g[2] - args.idle_threshold for g in gaps)
print(f"\nIDLE GAPS (interval > {args.idle_threshold}s) -- candidates to trim/cap")
if gaps:
print(f" {'begins(s)':>9} {'gap(s)':>7} next-output")
for _, begins, gap, after in sorted(gaps, key=lambda g: -g[2])[:args.limit]:
print(f" {begins:9.3f} {gap:7.3f} {after!r}")
print(f" -> capping at {args.idle_threshold}s reclaims {reclaim:.3f}s "
f"(new duration ~{total - reclaim:.3f}s)")
else:
print(" (none)")
runs = find_typing(rows, times, args.type_min_gap, args.type_max_gap,
args.min_keystrokes)
print(f"\nTYPING RUNS (cadence {args.type_min_gap}-{args.type_max_gap}s, "
f">={args.min_keystrokes} events) -- candidates to speed up")
if runs:
print(f" {'start(s)':>8} {'end(s)':>7} {'dur(s)':>6} {'evts':>4} text~")
for start, end, dur, count, text in runs[:args.limit]:
print(f" {start:8.3f} {end:7.3f} {dur:6.3f} {count:4d} {text!r}")
else:
print(" (none)")
print("\nNext: build a plan (see SKILL.md) and run: castedit.py edit IN OUT --plan plan.json")
# --- show --------------------------------------------------------------------
def cmd_show(args):
_, rows = load(args.infile)
times, _ = event_times(rows)
for i, (kind, payload) in enumerate(rows):
if kind != "e":
continue
t = times[i]
if args.start <= t <= args.end:
print(f"{t:8.3f} {payload[1]} {json.dumps(payload[2])}")
# --- edit --------------------------------------------------------------------
def in_regions(t, regions):
for r in regions:
if r["start"] <= t <= r["end"]:
return r
return None
def cmd_edit(args):
header, rows = load(args.infile)
times, before = event_times(rows)
plan = {}
if args.plan:
with open(args.plan) as f:
plan = json.load(f)
if args.idle_cap is not None: # CLI override / shortcut
plan["idle_cap"] = args.idle_cap
idle_cap = plan.get("idle_cap")
speed_regions = plan.get("speed_regions", [])
cuts = plan.get("cuts", [])
resume_gap = plan.get("resume_gap", 0.1)
out = [json.dumps(header, separators=(",", ":"), ensure_ascii=False)]
carry = 0.0 # error-diffusion accumulator keeps total drift sub-ms
just_cut = False
for i, (kind, payload) in enumerate(rows):
if kind == "h":
continue
if kind == "c":
out.append(payload)
continue
interval, code, data = payload
t = times[i]
if in_regions(t, cuts): # drop this event entirely
just_cut = True
continue
if idle_cap is not None:
interval = min(interval, idle_cap)
region = in_regions(t, speed_regions)
if region:
interval = interval / region["factor"]
if just_cut: # close the hole left by a cut
interval = min(interval, resume_gap)
just_cut = False
carry += interval
rounded = round(carry, 3)
if rounded <= 0.0: # never emit a negative interval or
rounded = 0.0 # "-0.0", which the v3 parser rejects
carry -= rounded
# ensure_ascii=False keeps non-ASCII glyphs literal (matching asciinema's
# own output and keeping files compact); control chars are still escaped.
out.append(json.dumps([rounded, code, data], ensure_ascii=False))
with open(args.outfile, "w") as f:
f.write("\n".join(out) + "\n")
# recompute new duration for the summary
_, after = event_times(load(args.outfile)[1])
print(f"wrote {args.outfile}")
print(f"duration: {before:.3f}s -> {after:.3f}s "
f"({before - after:+.3f}s, {after / before * 100:.0f}% of original)")
print("validate: asciinema convert "
f"{args.outfile} /tmp/_v.txt --overwrite (errors here = invalid cast)")
def main():
ap = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
sub = ap.add_subparsers(dest="cmd", required=True)
a = sub.add_parser("analyze", help="print a compact timeline summary")
a.add_argument("infile")
a.add_argument("--idle-threshold", type=float, default=0.5)
a.add_argument("--type-min-gap", type=float, default=0.04)
a.add_argument("--type-max-gap", type=float, default=0.5)
a.add_argument("--min-keystrokes", type=int, default=4)
a.add_argument("--limit", type=int, default=None,
help="cap how many gaps/runs to list (default: show all). "
"Set this only if a very long cast makes the output unwieldy.")
a.set_defaults(func=cmd_analyze)
s = sub.add_parser("show", help="dump raw events in a time window")
s.add_argument("infile")
s.add_argument("--start", type=float, default=0.0)
s.add_argument("--end", type=float, default=float("inf"))
s.set_defaults(func=cmd_show)
e = sub.add_parser("edit", help="apply a JSON plan and write a new cast")
e.add_argument("infile")
e.add_argument("outfile")
e.add_argument("--plan", help="JSON plan file (see SKILL.md for schema)")
e.add_argument("--idle-cap", type=float, default=None,
help="shortcut: cap every gap at N seconds (overrides plan)")
e.set_defaults(func=cmd_edit)
args = ap.parse_args()
args.func(args)
if __name__ == "__main__":
main()
name cast-edit
description Edit and render asciinema / asciicast terminal recordings (.cast files): trim idle/dead time, speed up regions or detected typing runs, and render a .cast to a GIF with the agg flags that avoid Claude Code glyph artifacts (tofu auto-mode arrows, a stray block under the first typed char). Use this whenever the user wants to tighten, shorten, trim pauses from, speed up, convert, or render a terminal recording or the .cast behind a terminal GIF (agg/asciinema), even if they don't say "asciicast". Phrases like "trim the pauses", "this demo GIF is too slow", "convert this cast to a gif", or "the arrows in my gif look broken" should trigger it.

cast-edit

Post-process asciicast v3 recordings: remove idle windows and speed up chosen regions. The engine is scripts/castedit.py. Modern asciinema (3.x) records v3 by default; if you have a v2 cast, convert it first with asciinema convert old.cast new.cast (writes v3).

Why this is easy, and the one rule that keeps it cheap

In v3 each event line is [interval, code, data] where interval is seconds since the previous event. Editing time is pure arithmetic on interval — there are no absolute timestamps to recompute. So trimming and speeding are robust and lossless to the rendered output.

The cast never goes into your context. A real recording can be megabytes of ANSI output. castedit.py analyze does the parsing and hands you a few-hundred-token summary; you reason over that. Only run show (a small slice) if you must inspect raw events. Never Read a whole .cast to plan an edit — that defeats the design.

Workflow

  1. Analyze. Get the timeline, idle gaps, and detected typing runs:

    uv run python "${CLAUDE_SKILL_DIR}/scripts/castedit.py" analyze RECORDING.cast
    

    Tune thresholds if the defaults miss things: --idle-threshold 0.7, --type-max-gap 0.4, --min-keystrokes 3.

  2. Decide regions, then verify. Idle gaps map directly to an idle_cap. For "speed up the typing" (or similar criteria), the typing runs in the analyze output are candidates — each comes with a text~ snippet reconstructed from the events. Read those snippets to confirm a run is really the kind of region the user means before speeding it up. If a snippet is ambiguous, inspect it precisely:

    uv run python "${CLAUDE_SKILL_DIR}/scripts/castedit.py" show RECORDING.cast --start 2.4 --end 3.9
    
  3. Propose the plan to the user in plain terms (which gaps capped, which regions sped up and by how much, before/after duration estimate) and get approval. Editing a recording is the kind of change worth confirming before applying.

  4. Write the plan and apply it:

    uv run python "${CLAUDE_SKILL_DIR}/scripts/castedit.py" edit RECORDING.cast RECORDING.edited.cast --plan plan.json
    

    For the trivial "just cap idle" case you can skip the plan file: ... edit IN OUT --idle-cap 0.5.

  5. Validate, then report. Always confirm the output is a parseable cast — a bad interval can silently corrupt it:

    asciinema convert RECORDING.edited.cast /tmp/_v.txt --overwrite
    

    The edit command already prints the before/after duration. If the user is making a GIF, render with the renderer and font flags that avoid agg's two Claude-Code artifacts — a stray block bleeding under the first typed character, and tofu auto-mode arrows:

    agg --renderer resvg \
      --text-font-family "JetBrains Mono,Fira Code,SF Mono,Menlo,Consolas,DejaVu Sans Mono,Liberation Mono,STIX Two Math" \
      RECORDING.edited.cast out.gif
    

    Why: agg's default swash renderer composites incrementally and leaves the reverse-video placeholder cursor under the first letter; resvg clears cells correctly. --text-font-family extends (not replaces) the fallback list, so the arrows (U+23F5) resolve via STIX Two Math while keeping agg's normal glyph fallbacks. On a machine without STIX Two Math, substitute any font carrying U+23F5 (fc-list ":charset=23f5").

Plan schema

All fields optional; times are in original-timeline seconds (the same numbers analyze and show print), so a region you saw in analyze pastes straight in.

{
  "idle_cap": 0.5,
  "speed_regions": [
    {"start": 2.40, "end": 3.85, "factor": 2.0}
  ],
  "cuts": [
    {"start": 40.0, "end": 55.0}
  ],
  "resume_gap": 0.1
}
  • idle_cap (s): clamp every gap longer than this to this value. The simplest "remove dead air" knob; one global threshold handles most cases.
  • speed_regions: each event whose firing time is in [start, end] has its interval divided by factor. factor > 1 is faster, < 1 slower. This is how both "speed up 2.4s–3.85s" (explicit bounds) and "speed up the typing" (bounds taken from a detected run) are expressed. Keep regions non-overlapping; the first match wins.
  • cuts: drop every event in [start, end] and the time it occupied. Use only for self-contained boring/idle stretches — cuts remove the output those events drew, so cutting mid-render can leave the screen in a wrong state. The gap left behind is closed to resume_gap seconds (default 0.1).

Application order per event: cut (drop) → idle_capspeed_regions.

Defaults and gotchas

  • Typing detection uses cadence, not payload size: a run of consecutive events spaced --type-min-gap..--type-max-gap apart (default 0.04–0.5s), at least --min-keystrokes long. Output bursts (~0s gaps) and idle (one big gap) are excluded by construction. Snippets are approximate because terminals redraw — verify, don't trust blindly.
  • The engine rounds intervals to millisecond precision with error diffusion (so total duration stays accurate) and clamps at 0. This matters: asciinema's v3 parser rejects a -0.0 interval with the misleading error invalid digit found in string, and naive rounding can produce one. The script already guards against this; preserve that clamp if you modify it.
  • analyze lists every idle gap and typing run by default, so you never plan an edit blind to part of the recording. On a very long cast that output can get large; pass --limit N to cap it to the N biggest, but prefer scoping with a future segment/zoom step over hiding rows.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment