Created
April 4, 2026 08:27
-
-
Save Florents-Tselai/97736306920396cb5d4ae339c551ea59 to your computer and use it in GitHub Desktop.
JSONPath string methods benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- JSONPath string methods benchmark | |
| -- Demonstrates the "Deconstruction Tax" on arrays vs scalar access | |
| -- Run with: psql -f jsonpath_string_methods_bench.sql | |
| -- Requires PostgreSQL 19+ | |
| \timing on | |
| \set ECHO all | |
| -- ============================================================ | |
| -- SETUP | |
| -- ============================================================ | |
| DROP TABLE IF EXISTS bench_scalar, bench_array; | |
| -- Scalar benchmark: one name field per document | |
| CREATE TABLE bench_scalar AS | |
| SELECT | |
| id, | |
| jsonb_build_object('name', 'John_' || id::text) AS data | |
| FROM generate_series(1, 1_000_000) id; | |
| -- Array benchmark: each document holds 10 tags | |
| CREATE TABLE bench_array AS | |
| SELECT | |
| id, | |
| jsonb_build_object( | |
| 'tags', ( | |
| SELECT jsonb_agg('tag_Value_' || (id * 10 + s)::text) | |
| FROM generate_series(1, 10) s | |
| ) | |
| ) AS data | |
| FROM generate_series(1, 100_000) id; | |
| -- Warm the cache | |
| SELECT count(*) FROM bench_scalar; | |
| SELECT count(*) FROM bench_array; | |
| -- ============================================================ | |
| -- PART 1: SCALAR — expect similar cost both ways | |
| -- ============================================================ | |
| \echo '' | |
| \echo '=== SCALAR: classic extraction + lower() ===' | |
| SELECT lower(data->>'name') | |
| FROM bench_scalar | |
| LIMIT 1000000; | |
| \echo '' | |
| \echo '=== SCALAR: jsonpath $.name.lower() ===' | |
| SELECT jsonb_path_query_first(data, '$.name.lower()') | |
| FROM bench_scalar | |
| LIMIT 1000000; | |
| -- ============================================================ | |
| -- PART 2: ARRAYS — where the Deconstruction Tax kicks in | |
| -- ============================================================ | |
| \echo '' | |
| \echo '=== ARRAY: classic unnest + lower() + reaggregate ===' | |
| SELECT | |
| id, | |
| jsonb_agg(lower(tag #>> '{}')) -- #>> '{}' extracts scalar text from a jsonb string element | |
| FROM bench_array, | |
| jsonb_array_elements(data->'tags') AS tag | |
| GROUP BY id | |
| LIMIT 100000; | |
| \echo '' | |
| \echo '=== ARRAY: jsonpath $.tags[*].lower() — single loop, no materialization ===' | |
| SELECT | |
| id, | |
| jsonb_path_query_array(data, '$.tags[*].lower()') | |
| FROM bench_array | |
| LIMIT 100000; | |
| -- ============================================================ | |
| -- PART 3: NESTED STRUCTURE — deeper nesting amplifies the tax | |
| -- ============================================================ | |
| DROP TABLE IF EXISTS bench_nested; | |
| CREATE TABLE bench_nested AS | |
| SELECT | |
| id, | |
| jsonb_build_object( | |
| 'users', ( | |
| SELECT jsonb_agg( | |
| jsonb_build_object( | |
| 'name', 'Alice_' || (id * 5 + s)::text, | |
| 'city', 'New_York_' || s::text | |
| ) | |
| ) | |
| FROM generate_series(1, 5) s | |
| ) | |
| ) AS data | |
| FROM generate_series(1, 100_000) id; | |
| \echo '' | |
| \echo '=== NESTED: classic path — two unnests + reaggregate ===' | |
| SELECT | |
| id, | |
| jsonb_agg(lower(u->>'name') || ', ' || initcap(u->>'city')) | |
| FROM bench_nested, | |
| jsonb_array_elements(data->'users') AS u | |
| GROUP BY id | |
| LIMIT 100000; | |
| \echo '' | |
| \echo '=== NESTED: jsonpath — single pass, lower() + initcap() inline ===' | |
| SELECT | |
| id, | |
| jsonb_path_query_array(data, '$.users[*].name.lower()') | |
| FROM bench_nested | |
| LIMIT 100000; | |
| -- ============================================================ | |
| -- CLEANUP | |
| -- ============================================================ | |
| DROP TABLE bench_scalar, bench_array, bench_nested; |
Author
Author
| Test | Classic SQL | JSONPath | Speedup |
|---|---|---|---|
| Scalar (1M rows, single key) | 184 ms | 315 ms | JSONPath 1.7× slower |
| Array (100K docs × 10 elements) | 717 ms | 174 ms | JSONPath 4.1× faster |
| Nested (100K docs × 5 objects) | 528 ms | 114 ms | JSONPath 4.6× faster |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
\echo '=== SCALAR: classic extraction + lower() ==='
=== SCALAR: classic extraction + lower() ===
Time: 184.130 ms
\echo '=== SCALAR: jsonpath $.name.lower() ==='
=== SCALAR: jsonpath $.name.lower() ===
Time: 315.407 ms
-- ============================================================
-- ============================================================
\echo '=== ARRAY: classic unnest + lower() + reaggregate ==='
=== ARRAY: classic unnest + lower() + reaggregate ===
Time: 717.719 ms
\echo '=== ARRAY: jsonpath $.tags[].lower() — single loop, no materialization ==='
=== ARRAY: jsonpath $.tags[].lower() — single loop, no materialization ===
Time: 174.326 ms
-- ============================================================
-- ============================================================
Time: 0.155 ms
Time: 543.764 ms
\echo '=== NESTED: classic path — two unnests + reaggregate ==='
=== NESTED: classic path — two unnests + reaggregate ===
Time: 528.621 ms
\echo '=== NESTED: jsonpath — single pass, lower() + initcap() inline ==='
=== NESTED: jsonpath — single pass, lower() + initcap() inline ===
Time: 114.284 ms