Created
May 9, 2026 13:41
-
-
Save monotykamary/67ae4ca870dd31e958889ef842bae033 to your computer and use it in GitHub Desktop.
DuckDB SQL queries for analyzing pi coding agent sessions from JSONL logs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- Pi HTTP Request Analysis | |
| -- Counts all outbound HTTP requests: assistant API calls + tool calls | |
| INSTALL json; | |
| LOAD json; | |
| -- Create a table from all JSONL session files | |
| CREATE OR REPLACE TABLE events AS | |
| SELECT * FROM read_json_auto('agent/sessions/*/*.jsonl', format='newline_delimited', ignore_errors=true, maximum_object_size=134217728); | |
| -- Extract date from timestamp (read_json_auto parses ISO timestamps automatically) | |
| CREATE OR REPLACE VIEW events_with_date AS | |
| SELECT | |
| *, | |
| timestamp::DATE as event_date, | |
| timestamp::TIMESTAMP as event_ts | |
| FROM events; | |
| -- Count assistant API calls (LLM requests) | |
| CREATE OR REPLACE VIEW assistant_calls AS | |
| SELECT | |
| timestamp, | |
| event_date, | |
| message.provider, | |
| message.model, | |
| message.usage.totalTokens as tokens, | |
| message.usage.cost.total as cost_usd | |
| FROM events_with_date | |
| WHERE type = 'message' | |
| AND message.role = 'assistant' | |
| AND message.usage IS NOT NULL; | |
| -- Count tool calls (web_search, fetch_content, code_search, mcp, bash, read, etc.) | |
| -- Tool calls are logged as toolResult messages | |
| CREATE OR REPLACE VIEW tool_calls AS | |
| SELECT | |
| timestamp, | |
| event_date, | |
| message.toolName as tool_name, | |
| 'tool_call' as call_type | |
| FROM events_with_date | |
| WHERE type = 'message' | |
| AND message.role = 'toolResult' | |
| AND message.toolName IS NOT NULL; | |
| -- Combined view of all HTTP requests | |
| CREATE OR REPLACE VIEW all_requests AS | |
| SELECT | |
| event_date, | |
| timestamp, | |
| timestamp::TIMESTAMP as event_ts, | |
| 'assistant_api' as request_type, | |
| provider || ':' || model as target, | |
| tokens, | |
| cost_usd | |
| FROM assistant_calls | |
| UNION ALL | |
| SELECT | |
| event_date, | |
| timestamp, | |
| timestamp::TIMESTAMP as event_ts, | |
| 'tool_call' as request_type, | |
| tool_name as target, | |
| NULL as tokens, | |
| NULL as cost_usd | |
| FROM tool_calls; | |
| -- Overall totals | |
| SELECT '=== OVERALL REQUEST STATISTICS ===' as section; | |
| SELECT | |
| (SELECT COUNT(*) FROM assistant_calls) as total_assistant_api_calls, | |
| (SELECT COUNT(*) FROM tool_calls) as total_tool_calls, | |
| (SELECT COUNT(*) FROM all_requests) as total_http_requests; | |
| -- By Day (today/yesterday example - adjust date as needed) | |
| SELECT '' as blank; | |
| SELECT '=== REQUESTS BY DAY (most recent 14 days) ===' as section; | |
| SELECT | |
| event_date, | |
| COUNT(*) FILTER (WHERE request_type = 'assistant_api') as assistant_calls, | |
| COUNT(*) FILTER (WHERE request_type = 'tool_call') as tool_calls, | |
| COUNT(*) as total_requests, | |
| ROUND(SUM(cost_usd) FILTER (WHERE request_type = 'assistant_api'), 4) as cost_usd | |
| FROM all_requests | |
| GROUP BY event_date | |
| ORDER BY event_date DESC | |
| LIMIT 14; | |
| -- Averages per day | |
| SELECT '' as blank; | |
| SELECT '=== DAILY AVERAGES ===' as section; | |
| WITH daily_stats AS ( | |
| SELECT | |
| event_date, | |
| COUNT(*) as daily_requests, | |
| COUNT(*) FILTER (WHERE request_type = 'assistant_api') as daily_assistant, | |
| COUNT(*) FILTER (WHERE request_type = 'tool_call') as daily_tools | |
| FROM all_requests | |
| GROUP BY event_date | |
| ) | |
| SELECT | |
| ROUND(AVG(daily_requests), 2) as avg_total_requests_per_day, | |
| ROUND(AVG(daily_assistant), 2) as avg_assistant_calls_per_day, | |
| ROUND(AVG(daily_tools), 2) as avg_tool_calls_per_day, | |
| COUNT(DISTINCT event_date) as active_days, | |
| MIN(event_date) as first_day, | |
| MAX(event_date) as last_day | |
| FROM daily_stats; | |
| -- Tool call breakdown | |
| SELECT '' as blank; | |
| SELECT '=== TOOL CALLS BY TYPE ===' as section; | |
| SELECT | |
| tool_name, | |
| COUNT(*) as call_count, | |
| ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM tool_calls), 2) as percentage | |
| FROM tool_calls | |
| GROUP BY tool_name | |
| ORDER BY call_count DESC | |
| LIMIT 20; | |
| -- Provider/Model breakdown for assistant calls | |
| SELECT '' as blank; | |
| SELECT '=== ASSISTANT CALLS BY PROVIDER/MODEL ===' as section; | |
| SELECT | |
| provider, | |
| model, | |
| COUNT(*) as call_count, | |
| SUM(tokens)::BIGINT as total_tokens, | |
| ROUND(SUM(cost_usd), 4) as total_cost_usd | |
| FROM assistant_calls | |
| GROUP BY provider, model | |
| ORDER BY call_count DESC | |
| LIMIT 20; | |
| -- Hourly distribution (average per hour of day) | |
| SELECT '' as blank; | |
| SELECT '=== HOURLY DISTRIBUTION (avg per hour) ===' as section; | |
| SELECT | |
| hour_of_day, | |
| ROUND(AVG(hourly_count), 2) as avg_requests, | |
| ROUND(AVG(hourly_assistant), 2) as avg_assistant, | |
| ROUND(AVG(hourly_tools), 2) as avg_tools | |
| FROM ( | |
| SELECT | |
| event_date, | |
| EXTRACT(HOUR FROM event_ts)::INTEGER as hour_of_day, | |
| COUNT(*) as hourly_count, | |
| COUNT(*) FILTER (WHERE request_type = 'assistant_api') as hourly_assistant, | |
| COUNT(*) FILTER (WHERE request_type = 'tool_call') as hourly_tools | |
| FROM all_requests | |
| GROUP BY event_date, EXTRACT(HOUR FROM event_ts)::INTEGER | |
| ) hourly_data | |
| GROUP BY hour_of_day | |
| ORDER BY hour_of_day; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- Install and load JSON extension | |
| INSTALL json; | |
| LOAD json; | |
| -- Create a table from all JSONL session files | |
| CREATE OR REPLACE TABLE events AS | |
| SELECT * FROM read_json_auto('agent/sessions/*/*.jsonl', format='newline_delimited', ignore_errors=true, maximum_object_size=134217728); | |
| -- Create view for messages with usage data | |
| CREATE OR REPLACE VIEW messages_with_usage AS | |
| SELECT | |
| timestamp, | |
| message.role as role, | |
| message.usage.input as input_tokens, | |
| message.usage.output as output_tokens, | |
| message.usage.cacheRead as cache_read_tokens, | |
| message.usage.cacheWrite as cache_write_tokens, | |
| COALESCE(message.usage.totalTokens, | |
| message.usage.input + message.usage.output + message.usage.cacheRead + message.usage.cacheWrite) as total_tokens, | |
| message.usage.cost.total as cost_usd, | |
| strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')::DATE as date, | |
| YEAR(strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')) as year, | |
| MONTH(strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')) as month, | |
| WEEK(strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')) as week | |
| FROM events | |
| WHERE type = 'message' | |
| AND message.usage IS NOT NULL | |
| AND (message.usage.input IS NOT NULL OR message.usage.output IS NOT NULL); | |
| -- Overall totals | |
| SELECT '=== OVERALL TOKEN STATISTICS ===' as section; | |
| SELECT | |
| COUNT(*) as total_messages_with_usage, | |
| SUM(input_tokens)::BIGINT as total_input_tokens, | |
| SUM(output_tokens)::BIGINT as total_output_tokens, | |
| SUM(cache_read_tokens)::BIGINT as total_cache_read_tokens, | |
| SUM(cache_write_tokens)::BIGINT as total_cache_write_tokens, | |
| SUM(total_tokens)::BIGINT as total_tokens, | |
| ROUND(SUM(cost_usd), 4) as total_cost_usd | |
| FROM messages_with_usage; | |
| -- By Day | |
| SELECT '' as blank; | |
| SELECT '=== TOKENS BY DAY (top 20) ===' as section; | |
| SELECT | |
| date, | |
| COUNT(*) as messages, | |
| SUM(input_tokens)::BIGINT as input_tokens, | |
| SUM(output_tokens)::BIGINT as output_tokens, | |
| SUM(total_tokens)::BIGINT as total_tokens, | |
| ROUND(SUM(cost_usd), 4) as cost_usd | |
| FROM messages_with_usage | |
| GROUP BY date | |
| ORDER BY total_tokens DESC | |
| LIMIT 20; | |
| -- By Week | |
| SELECT '' as blank; | |
| SELECT '=== TOKENS BY WEEK ===' as section; | |
| SELECT | |
| year, | |
| week, | |
| COUNT(*) as messages, | |
| SUM(input_tokens)::BIGINT as input_tokens, | |
| SUM(output_tokens)::BIGINT as output_tokens, | |
| SUM(total_tokens)::BIGINT as total_tokens, | |
| ROUND(SUM(cost_usd), 4) as cost_usd | |
| FROM messages_with_usage | |
| GROUP BY year, week | |
| ORDER BY year, week; | |
| -- By Month | |
| SELECT '' as blank; | |
| SELECT '=== TOKENS BY MONTH ===' as section; | |
| SELECT | |
| year, | |
| month, | |
| COUNT(*) as messages, | |
| SUM(input_tokens)::BIGINT as input_tokens, | |
| SUM(output_tokens)::BIGINT as output_tokens, | |
| SUM(total_tokens)::BIGINT as total_tokens, | |
| ROUND(SUM(cost_usd), 4) as cost_usd | |
| FROM messages_with_usage | |
| GROUP BY year, month | |
| ORDER BY year, month; | |
| -- Averages | |
| SELECT '' as blank; | |
| SELECT '=== AVERAGES ===' as section; | |
| SELECT | |
| 'Per Day (avg)' as metric, | |
| ROUND(SUM(total_tokens) / COUNT(DISTINCT date), 0)::BIGINT as avg_tokens | |
| FROM messages_with_usage | |
| UNION ALL | |
| SELECT | |
| 'Per Week (avg)' as metric, | |
| ROUND(SUM(total_tokens) / COUNT(DISTINCT (year || '-' || week)), 0)::BIGINT as avg_tokens | |
| FROM messages_with_usage | |
| UNION ALL | |
| SELECT | |
| 'Per Month (avg)' as metric, | |
| ROUND(SUM(total_tokens) / COUNT(DISTINCT (year || '-' || month)), 0)::BIGINT as avg_tokens | |
| FROM messages_with_usage | |
| UNION ALL | |
| SELECT | |
| 'Per Message (avg)' as metric, | |
| ROUND(SUM(total_tokens) / COUNT(*), 0)::BIGINT as avg_tokens | |
| FROM messages_with_usage; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- User Message Statistics Analysis | |
| -- Counts user messages across all pi sessions with breakdowns by date and workspace | |
| INSTALL json; | |
| LOAD json; | |
| -- Create a table from all JSONL session files | |
| CREATE OR REPLACE TABLE events AS | |
| SELECT * FROM read_json_auto('agent/sessions/*/*.jsonl', format='newline_delimited', ignore_errors=true); | |
| -- Track cwd changes and user messages | |
| CREATE OR REPLACE VIEW user_messages AS | |
| WITH session_cwds AS ( | |
| SELECT | |
| timestamp, | |
| cwd, | |
| ROW_NUMBER() OVER (ORDER BY timestamp) as session_order | |
| FROM events | |
| WHERE type = 'session' | |
| ), | |
| messages_with_cwd AS ( | |
| SELECT | |
| e.timestamp, | |
| e.message.role as role, | |
| strptime(e.timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')::DATE as date, | |
| -- Get the most recent cwd before this message | |
| (SELECT cwd FROM session_cwds s | |
| WHERE s.timestamp <= e.timestamp | |
| ORDER BY s.timestamp DESC LIMIT 1) as cwd | |
| FROM events e | |
| WHERE e.type = 'message' | |
| AND e.message.role = 'user' | |
| ) | |
| SELECT * FROM messages_with_cwd; | |
| -- Overall statistics | |
| SELECT '=== USER MESSAGE STATISTICS ===' as section; | |
| SELECT | |
| COUNT(*) as total_user_messages, | |
| COUNT(DISTINCT date) as active_days, | |
| MIN(date) as first_date, | |
| MAX(date) as last_date | |
| FROM user_messages; | |
| -- By date | |
| SELECT '' as blank; | |
| SELECT '=== BY DATE ===' as section; | |
| SELECT | |
| date, | |
| COUNT(*) as messages, | |
| COUNT(DISTINCT cwd) as workspaces | |
| FROM user_messages | |
| GROUP BY date | |
| ORDER BY date; | |
| -- Summary stats by date range | |
| SELECT '' as blank; | |
| SELECT '=== SUMMARY STATISTICS ===' as section; | |
| WITH date_stats AS ( | |
| SELECT | |
| COUNT(*) as total, | |
| COUNT(DISTINCT date) as active_days, | |
| MIN(date) as first_date, | |
| MAX(date) as last_date | |
| FROM user_messages | |
| ) | |
| SELECT | |
| total as total_user_messages, | |
| active_days, | |
| ROUND(total::DOUBLE / active_days, 2) as avg_per_active_day, | |
| first_date, | |
| last_date, | |
| (MAX(date) - MIN(date) + 1) as days_in_range, | |
| ROUND(total::DOUBLE / (MAX(date) - MIN(date) + 1), 2) as avg_over_all_days | |
| FROM date_stats, user_messages | |
| GROUP BY total, active_days, first_date, last_date; | |
| -- Top workspaces | |
| SELECT '' as blank; | |
| SELECT '=== TOP WORKSPACES ===' as section; | |
| SELECT | |
| cwd, | |
| COUNT(*) as messages | |
| FROM user_messages | |
| GROUP BY cwd | |
| ORDER BY messages DESC | |
| LIMIT 15; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment