IMPORTANT: Do NOT write multi-line Python as inline python3 -c — the terminal garbles it. Always write to a file first, then run.
Run these in order. Set CONVO first:
CONVO=~/.openhands/conversations/CONVERSATION_ID
Event count and time span:
echo "Events: $(ls $CONVO/events/ | wc -l)" && jq -r '.timestamp[:19]' $CONVO/events/event-00000-*.json && jq -r '.timestamp[:19]' $CONVO/events/$(ls $CONVO/events/ | tail -1)
Model used:
jq -r '.agent.llm.model' $CONVO/base_state.json
User's request (first message):
jq -r '.llm_message.content | if type == "array" then map(select(.type == "text") | .text) | join(" ") else . end' $CONVO/events/event-00001-*.json | head -c 500
Did it finish? (look for FinishAction):
grep -l FinishAction $CONVO/events/*.json | head -1 && echo "YES" || echo "NO — never finished"
Finish message (if it finished):
grep -l FinishAction $CONVO/events/*.json | head -1 | xargs jq -r '.action.message // empty' 2>/dev/null | head -c 500
Errors — non-zero exit codes:
for f in $CONVO/events/*.json; do jq -r 'select(.observation.kind == "TerminalObservation" and .observation.metadata.exit_code != 0) | "\(._idx // input_filename) exit=\(.observation.metadata.exit_code)"' "$f" 2>/dev/null; done
ErrorObservation events:
for f in $CONVO/events/*.json; do jq -r 'select(.observation.kind == "ErrorObservation") | "\(input_filename | split("-")[1]): \(.observation.text[:120])"' "$f" 2>/dev/null; done
Full timeline (action kinds only, compact):
for f in $(ls $CONVO/events/event-*.json); do jq -r '[.timestamp[:19], .source, (.action.kind // .observation.kind // .kind // "?")] | join(" ")' "$f" 2>/dev/null; done
If jq triage shows something interesting, write this script to /tmp/oh_analyze.py and run it. It handles loop detection, time gaps, and a full narrative timeline.
cat > /tmp/oh_analyze.py << 'PYEOF'
import json, os, glob, sys
from datetime import datetime
convo_dir = sys.argv[1] if len(sys.argv) > 1 else "."
events_dir = os.path.join(convo_dir, "events")
files = sorted(glob.glob(os.path.join(events_dir, "event-*.json")))
if not files:
print("No events found in " + events_dir)
sys.exit(1)
events = []
for f in files:
with open(f) as fh:
events.append(json.load(fh))
def akind(e):
a = e.get("action", {})
return a.get("kind", "") if isinstance(a, dict) else ""
def okind(e):
o = e.get("observation", {})
return o.get("kind", "") if isinstance(o, dict) else ""
def describe(e, i):
src = e.get("source", "?")
ts = e.get("timestamp", "?")[:19]
action = e.get("action", {}) if isinstance(e.get("action"), dict) else {}
obs = e.get("observation", {}) if isinstance(e.get("observation"), dict) else {}
ak, ok = action.get("kind", ""), obs.get("kind", "")
kind = e.get("kind", "")
if kind == "SystemPromptEvent": return ts + " " + src.ljust(10) + " | SYSTEM PROMPT"
if kind == "MessageEvent":
lm = e.get("llm_message", {}) or {}
t = lm.get("content", "")
if isinstance(t, list): t = " ".join(x.get("text", "") for x in t if isinstance(x, dict))
return ts + " " + src.ljust(10) + " | MSG: " + str(t)[:140].replace("\n", " ")
if ak == "TerminalAction": return ts + " " + src.ljust(10) + " | CMD: " + str(action.get("command", ""))[:110]
if ak == "FileEditorAction":
p = str(action.get("path", ""))
if len(p) > 55: p = "..." + p[-52:]
return ts + " " + src.ljust(10) + " | EDIT(" + str(action.get("command", "")) + "): " + p
if ak == "MCPToolAction": return ts + " " + src.ljust(10) + " | MCP: " + str(action.get("server_name", "")) + "/" + str(action.get("tool_name", ""))
if ak == "ThinkAction": return ts + " " + src.ljust(10) + " | THINK: " + str(action.get("thought", ""))[:100]
if ak == "FinishAction": return ts + " " + src.ljust(10) + " | FINISH: " + str(action.get("message", ""))[:100]
if ak == "DelegateAction": return ts + " " + src.ljust(10) + " | DELEGATE " + str(action.get("command", ""))
if ak == "TaskTrackerAction": return ts + " " + src.ljust(10) + " | TASK_TRACKER"
if ak: return ts + " " + src.ljust(10) + " | ACTION: " + ak
if ok == "TerminalObservation":
md = obs.get("metadata", {}) or {}
return ts + " " + src.ljust(10) + " | exit=" + str(md.get("exit_code", "?"))
if ok == "ErrorObservation": return ts + " " + src.ljust(10) + " | ERROR: " + str(obs.get("text", ""))[:90].replace("\n", " ")
if ok == "FinishObservation": return ts + " " + src.ljust(10) + " | FINISH OBS"
if ok: return ts + " " + src.ljust(10) + " | " + ok
return ts + " " + src.ljust(10) + " | " + kind
# --- Summary ---
bs_path = os.path.join(convo_dir, "base_state.json")
model = "?"
if os.path.exists(bs_path):
with open(bs_path) as f: model = json.load(f).get("agent", {}).get("llm", {}).get("model", "?")
t0 = datetime.fromisoformat(events[0].get("timestamp", "2000-01-01"))
t1 = datetime.fromisoformat(events[-1].get("timestamp", "2000-01-01"))
dur = (t1 - t0).total_seconds()
finished = any(akind(e) == "FinishAction" for e in events)
turns = sum(1 for e in events if e.get("source") == "agent" and akind(e))
print("=" * 65)
print("Model: " + model)
print("Events: " + str(len(events)) + " | LLM turns: " + str(turns))
print("Time: " + events[0].get("timestamp", "")[:19] + " -> " + events[-1].get("timestamp", "")[:19])
print("Duration: " + str(int(dur)) + "s (" + str(round(dur / 60, 1)) + " min)")
print("Finished: " + ("YES" if finished else "NO"))
# Errors
errors = []
for i, e in enumerate(events):
o = e.get("observation", {}) if isinstance(e.get("observation"), dict) else {}
if o.get("kind") == "ErrorObservation":
errors.append((i, "ERROR: " + str(o.get("text", ""))[:120]))
if o.get("kind") == "TerminalObservation":
md = o.get("metadata", {}) or {}
if md.get("exit_code", 0) != 0:
errors.append((i, "exit=" + str(md["exit_code"])))
# Time gaps
gaps = []
for i in range(1, len(events)):
try:
ta = datetime.fromisoformat(events[i-1].get("timestamp", ""))
tb = datetime.fromisoformat(events[i].get("timestamp", ""))
g = (tb - ta).total_seconds()
if g > 30: gaps.append((i, g))
except: pass
# Loop detection
action_seq = [akind(e) for e in events if akind(e)]
loops = []
for slen in range(3, min(8, len(action_seq) // 2)):
for start in range(len(action_seq) - slen * 2 + 1):
seq = action_seq[start:start + slen]
if len(set(seq)) <= 1: continue
reps, pos = 1, start + slen
while pos + slen <= len(action_seq):
if action_seq[pos:pos + slen] == seq: reps += 1; pos += slen
else: break
if reps >= 3: loops.append((start, slen, reps, seq))
seen = set()
uloops = []
for s, sl, r, sq in sorted(loops, key=lambda x: -x[1]):
if s not in seen:
uloops.append((s, sl, r, sq))
for x in range(s, s + sl * r): seen.add(x)
print()
print("--- Health ---")
print("Errors: " + str(len(errors)))
for idx, msg in errors: print(" [" + str(idx) + "] " + msg)
print("Gaps >30s: " + str(len(gaps)))
for idx, g in gaps: print(" [" + str(idx) + "] " + str(int(g)) + "s")
print("Loops: " + str(len(uloops)))
for s, sl, r, sq in uloops:
short = [x.replace("Action", "") for x in sq]
print(" [" + str(s) + "-" + str(s + sl * r) + "] " + str(short) + " x" + str(r))
if not finished or gaps or uloops:
print("\n!! POSSIBLY STUCK — check above")
else:
print("\nOK — completed normally")
# Timeline
print()
print("--- Timeline ---")
for i, e in enumerate(events):
print(str(i).rjust(5) + " " + describe(e, i))
PYEOF
Then run:
python3 /tmp/oh_analyze.py ~/.openhands/conversations/CONVERSATION_ID
- What was the user's request?
- What did the agent actually do? (brief narrative from the timeline)
- Did it complete? (FinishAction present?)
- Were there errors? Did the agent recover or spiral?
- Did it get stuck? (time gaps, loops, no finish, high event count)
- How efficient was it? (duration, LLM turns, ratio of errors to total events)