Skip to content

Instantly share code, notes, and snippets.

@monotykamary
Created May 9, 2026 13:41
Show Gist options
  • Select an option

  • Save monotykamary/67ae4ca870dd31e958889ef842bae033 to your computer and use it in GitHub Desktop.

Select an option

Save monotykamary/67ae4ca870dd31e958889ef842bae033 to your computer and use it in GitHub Desktop.
DuckDB SQL queries for analyzing pi coding agent sessions from JSONL logs
-- Pi HTTP Request Analysis
-- Counts all outbound HTTP requests: assistant API calls + tool calls
INSTALL json;
LOAD json;
-- Create a table from all JSONL session files
CREATE OR REPLACE TABLE events AS
SELECT * FROM read_json_auto('agent/sessions/*/*.jsonl', format='newline_delimited', ignore_errors=true, maximum_object_size=134217728);
-- Extract date from timestamp (read_json_auto parses ISO timestamps automatically)
CREATE OR REPLACE VIEW events_with_date AS
SELECT
*,
timestamp::DATE as event_date,
timestamp::TIMESTAMP as event_ts
FROM events;
-- Count assistant API calls (LLM requests)
CREATE OR REPLACE VIEW assistant_calls AS
SELECT
timestamp,
event_date,
message.provider,
message.model,
message.usage.totalTokens as tokens,
message.usage.cost.total as cost_usd
FROM events_with_date
WHERE type = 'message'
AND message.role = 'assistant'
AND message.usage IS NOT NULL;
-- Count tool calls (web_search, fetch_content, code_search, mcp, bash, read, etc.)
-- Tool calls are logged as toolResult messages
CREATE OR REPLACE VIEW tool_calls AS
SELECT
timestamp,
event_date,
message.toolName as tool_name,
'tool_call' as call_type
FROM events_with_date
WHERE type = 'message'
AND message.role = 'toolResult'
AND message.toolName IS NOT NULL;
-- Combined view of all HTTP requests
CREATE OR REPLACE VIEW all_requests AS
SELECT
event_date,
timestamp,
timestamp::TIMESTAMP as event_ts,
'assistant_api' as request_type,
provider || ':' || model as target,
tokens,
cost_usd
FROM assistant_calls
UNION ALL
SELECT
event_date,
timestamp,
timestamp::TIMESTAMP as event_ts,
'tool_call' as request_type,
tool_name as target,
NULL as tokens,
NULL as cost_usd
FROM tool_calls;
-- Overall totals
SELECT '=== OVERALL REQUEST STATISTICS ===' as section;
SELECT
(SELECT COUNT(*) FROM assistant_calls) as total_assistant_api_calls,
(SELECT COUNT(*) FROM tool_calls) as total_tool_calls,
(SELECT COUNT(*) FROM all_requests) as total_http_requests;
-- By Day (today/yesterday example - adjust date as needed)
SELECT '' as blank;
SELECT '=== REQUESTS BY DAY (most recent 14 days) ===' as section;
SELECT
event_date,
COUNT(*) FILTER (WHERE request_type = 'assistant_api') as assistant_calls,
COUNT(*) FILTER (WHERE request_type = 'tool_call') as tool_calls,
COUNT(*) as total_requests,
ROUND(SUM(cost_usd) FILTER (WHERE request_type = 'assistant_api'), 4) as cost_usd
FROM all_requests
GROUP BY event_date
ORDER BY event_date DESC
LIMIT 14;
-- Averages per day
SELECT '' as blank;
SELECT '=== DAILY AVERAGES ===' as section;
WITH daily_stats AS (
SELECT
event_date,
COUNT(*) as daily_requests,
COUNT(*) FILTER (WHERE request_type = 'assistant_api') as daily_assistant,
COUNT(*) FILTER (WHERE request_type = 'tool_call') as daily_tools
FROM all_requests
GROUP BY event_date
)
SELECT
ROUND(AVG(daily_requests), 2) as avg_total_requests_per_day,
ROUND(AVG(daily_assistant), 2) as avg_assistant_calls_per_day,
ROUND(AVG(daily_tools), 2) as avg_tool_calls_per_day,
COUNT(DISTINCT event_date) as active_days,
MIN(event_date) as first_day,
MAX(event_date) as last_day
FROM daily_stats;
-- Tool call breakdown
SELECT '' as blank;
SELECT '=== TOOL CALLS BY TYPE ===' as section;
SELECT
tool_name,
COUNT(*) as call_count,
ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM tool_calls), 2) as percentage
FROM tool_calls
GROUP BY tool_name
ORDER BY call_count DESC
LIMIT 20;
-- Provider/Model breakdown for assistant calls
SELECT '' as blank;
SELECT '=== ASSISTANT CALLS BY PROVIDER/MODEL ===' as section;
SELECT
provider,
model,
COUNT(*) as call_count,
SUM(tokens)::BIGINT as total_tokens,
ROUND(SUM(cost_usd), 4) as total_cost_usd
FROM assistant_calls
GROUP BY provider, model
ORDER BY call_count DESC
LIMIT 20;
-- Hourly distribution (average per hour of day)
SELECT '' as blank;
SELECT '=== HOURLY DISTRIBUTION (avg per hour) ===' as section;
SELECT
hour_of_day,
ROUND(AVG(hourly_count), 2) as avg_requests,
ROUND(AVG(hourly_assistant), 2) as avg_assistant,
ROUND(AVG(hourly_tools), 2) as avg_tools
FROM (
SELECT
event_date,
EXTRACT(HOUR FROM event_ts)::INTEGER as hour_of_day,
COUNT(*) as hourly_count,
COUNT(*) FILTER (WHERE request_type = 'assistant_api') as hourly_assistant,
COUNT(*) FILTER (WHERE request_type = 'tool_call') as hourly_tools
FROM all_requests
GROUP BY event_date, EXTRACT(HOUR FROM event_ts)::INTEGER
) hourly_data
GROUP BY hour_of_day
ORDER BY hour_of_day;
-- Install and load JSON extension
INSTALL json;
LOAD json;
-- Create a table from all JSONL session files
CREATE OR REPLACE TABLE events AS
SELECT * FROM read_json_auto('agent/sessions/*/*.jsonl', format='newline_delimited', ignore_errors=true, maximum_object_size=134217728);
-- Create view for messages with usage data
CREATE OR REPLACE VIEW messages_with_usage AS
SELECT
timestamp,
message.role as role,
message.usage.input as input_tokens,
message.usage.output as output_tokens,
message.usage.cacheRead as cache_read_tokens,
message.usage.cacheWrite as cache_write_tokens,
COALESCE(message.usage.totalTokens,
message.usage.input + message.usage.output + message.usage.cacheRead + message.usage.cacheWrite) as total_tokens,
message.usage.cost.total as cost_usd,
strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')::DATE as date,
YEAR(strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')) as year,
MONTH(strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')) as month,
WEEK(strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')) as week
FROM events
WHERE type = 'message'
AND message.usage IS NOT NULL
AND (message.usage.input IS NOT NULL OR message.usage.output IS NOT NULL);
-- Overall totals
SELECT '=== OVERALL TOKEN STATISTICS ===' as section;
SELECT
COUNT(*) as total_messages_with_usage,
SUM(input_tokens)::BIGINT as total_input_tokens,
SUM(output_tokens)::BIGINT as total_output_tokens,
SUM(cache_read_tokens)::BIGINT as total_cache_read_tokens,
SUM(cache_write_tokens)::BIGINT as total_cache_write_tokens,
SUM(total_tokens)::BIGINT as total_tokens,
ROUND(SUM(cost_usd), 4) as total_cost_usd
FROM messages_with_usage;
-- By Day
SELECT '' as blank;
SELECT '=== TOKENS BY DAY (top 20) ===' as section;
SELECT
date,
COUNT(*) as messages,
SUM(input_tokens)::BIGINT as input_tokens,
SUM(output_tokens)::BIGINT as output_tokens,
SUM(total_tokens)::BIGINT as total_tokens,
ROUND(SUM(cost_usd), 4) as cost_usd
FROM messages_with_usage
GROUP BY date
ORDER BY total_tokens DESC
LIMIT 20;
-- By Week
SELECT '' as blank;
SELECT '=== TOKENS BY WEEK ===' as section;
SELECT
year,
week,
COUNT(*) as messages,
SUM(input_tokens)::BIGINT as input_tokens,
SUM(output_tokens)::BIGINT as output_tokens,
SUM(total_tokens)::BIGINT as total_tokens,
ROUND(SUM(cost_usd), 4) as cost_usd
FROM messages_with_usage
GROUP BY year, week
ORDER BY year, week;
-- By Month
SELECT '' as blank;
SELECT '=== TOKENS BY MONTH ===' as section;
SELECT
year,
month,
COUNT(*) as messages,
SUM(input_tokens)::BIGINT as input_tokens,
SUM(output_tokens)::BIGINT as output_tokens,
SUM(total_tokens)::BIGINT as total_tokens,
ROUND(SUM(cost_usd), 4) as cost_usd
FROM messages_with_usage
GROUP BY year, month
ORDER BY year, month;
-- Averages
SELECT '' as blank;
SELECT '=== AVERAGES ===' as section;
SELECT
'Per Day (avg)' as metric,
ROUND(SUM(total_tokens) / COUNT(DISTINCT date), 0)::BIGINT as avg_tokens
FROM messages_with_usage
UNION ALL
SELECT
'Per Week (avg)' as metric,
ROUND(SUM(total_tokens) / COUNT(DISTINCT (year || '-' || week)), 0)::BIGINT as avg_tokens
FROM messages_with_usage
UNION ALL
SELECT
'Per Month (avg)' as metric,
ROUND(SUM(total_tokens) / COUNT(DISTINCT (year || '-' || month)), 0)::BIGINT as avg_tokens
FROM messages_with_usage
UNION ALL
SELECT
'Per Message (avg)' as metric,
ROUND(SUM(total_tokens) / COUNT(*), 0)::BIGINT as avg_tokens
FROM messages_with_usage;
-- User Message Statistics Analysis
-- Counts user messages across all pi sessions with breakdowns by date and workspace
INSTALL json;
LOAD json;
-- Create a table from all JSONL session files
CREATE OR REPLACE TABLE events AS
SELECT * FROM read_json_auto('agent/sessions/*/*.jsonl', format='newline_delimited', ignore_errors=true);
-- Track cwd changes and user messages
CREATE OR REPLACE VIEW user_messages AS
WITH session_cwds AS (
SELECT
timestamp,
cwd,
ROW_NUMBER() OVER (ORDER BY timestamp) as session_order
FROM events
WHERE type = 'session'
),
messages_with_cwd AS (
SELECT
e.timestamp,
e.message.role as role,
strptime(e.timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')::DATE as date,
-- Get the most recent cwd before this message
(SELECT cwd FROM session_cwds s
WHERE s.timestamp <= e.timestamp
ORDER BY s.timestamp DESC LIMIT 1) as cwd
FROM events e
WHERE e.type = 'message'
AND e.message.role = 'user'
)
SELECT * FROM messages_with_cwd;
-- Overall statistics
SELECT '=== USER MESSAGE STATISTICS ===' as section;
SELECT
COUNT(*) as total_user_messages,
COUNT(DISTINCT date) as active_days,
MIN(date) as first_date,
MAX(date) as last_date
FROM user_messages;
-- By date
SELECT '' as blank;
SELECT '=== BY DATE ===' as section;
SELECT
date,
COUNT(*) as messages,
COUNT(DISTINCT cwd) as workspaces
FROM user_messages
GROUP BY date
ORDER BY date;
-- Summary stats by date range
SELECT '' as blank;
SELECT '=== SUMMARY STATISTICS ===' as section;
WITH date_stats AS (
SELECT
COUNT(*) as total,
COUNT(DISTINCT date) as active_days,
MIN(date) as first_date,
MAX(date) as last_date
FROM user_messages
)
SELECT
total as total_user_messages,
active_days,
ROUND(total::DOUBLE / active_days, 2) as avg_per_active_day,
first_date,
last_date,
(MAX(date) - MIN(date) + 1) as days_in_range,
ROUND(total::DOUBLE / (MAX(date) - MIN(date) + 1), 2) as avg_over_all_days
FROM date_stats, user_messages
GROUP BY total, active_days, first_date, last_date;
-- Top workspaces
SELECT '' as blank;
SELECT '=== TOP WORKSPACES ===' as section;
SELECT
cwd,
COUNT(*) as messages
FROM user_messages
GROUP BY cwd
ORDER BY messages DESC
LIMIT 15;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment