jexp · September 27, 2022 14:47 · jexp · Sep 27, 2022
diff --git a/aggregation-index.cypher b/aggregation-index.cypher
 unwind range(2010,2022) as year unwind range(1,365) as day create (d:Date {year:year, day:day});
 // Added 4745 labels, created 4745 nodes, set 9490 properties, completed after 194 ms.
 create index for (d:Date) on (d.year);


 :auto unwind range(2010,2022) as year unwind range(1,365) as day match (d:Date {year:year, day:day})
 unwind range(1, 1000) as id
 call { with id,d
    create (l:LineItem {revenue:rand()*1000, quantity:toInteger(rand()*100)})
    create (l)-[:ON_DATE]->(d)
 } in transactions of 50000 rows;
 // Added 4745000 labels, created 4745000 nodes, set 9490000 properties, created 4745000 relationships, completed after 61032 ms.

 create index for (l:LineItem) on (l.quantity, l.revenue);

 profile
 match (d:Date {year :2013}) 
 match (l:LineItem)-[:ON_DATE]->(d)
 using join on l
 using index l:LineItem(quantity,revenue)
 where l.quantity > 15 and l.revenue is not null
 return sum (l.revenue);

 // Interestingly using join l is much faster than on d
 // possibly because there a fewer date nodes (365) that need to be iterated and checked against the line-item set

 /*
 ╒══════════════════╕
 │"sum (l.revenue)" │
 ╞══════════════════╡
 │153165258.38816753│
 └──────────────────┘
 */ 
 // Cypher version: , planner: COST, runtime: PIPELINED. 4351817 total db hits in 1428 ms.

 profile
 match (d:Date {year :2013}) 
 match (l:LineItem)-[:ON_DATE]->(d)
 using join on l
 using index l:LineItem(quantity,revenue)
 where l.quantity > 15 and l.revenue is not null
 return sum (l.revenue);
 /*
 +----------------------+
 | sum (l.revenue)      |
 +----------------------+
 | 1.5316525838816935E8 |
 +----------------------+

 +----------------------------------------------------------------------------------------------------+
 | Plan      | Statement   | Version | Planner | Runtime     | Time | DbHits  | Rows | Memory (Bytes) |
 +----------------------------------------------------------------------------------------------------+
 | "PROFILE" | "READ_ONLY" | ""      | "COST"  | "PIPELINED" | 1386 | 4351817 | 1    | 850136944      |
 +----------------------------------------------------------------------------------------------------+


 Planner COST

 Runtime PIPELINED

 Runtime version 5.0

 Batch size 1024

 +-------------------+------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
 | Operator          | Details                                                                                              | Estimated Rows | Rows    | DB Hits | Memory (Bytes) | Page Cache Hits/Misses | Time (ms) | Pipeline            |
 +-------------------+------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
 | +ProduceResults   | `sum (l.revenue)`                                                                                    |              1 |       1 |       0 |                |                    0/0 |     0,035 |                     |
 | |                 +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+                     |
 | +EagerAggregation | sum(cache[l.revenue]) AS `sum (l.revenue)`                                                           |              1 |       1 |       0 |             32 |                    0/0 |    24,010 | In Pipeline 3       |
 | |                 +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
 | +NodeHashJoin     | l                                                                                                    |          10466 |  306596 |       0 |      850136712 |                        |    76,525 | In Pipeline 2       |
 | |\                +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
 | | +Expand(All)    | (d)<-[anon_0:ON_DATE]-(l)                                                                            |         365000 |  365000 |  365730 |                |                        |           |                     |
 | | |               +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+                        |           |                     |
 | | +NodeIndexSeek  | RANGE INDEX d:Date(year) WHERE year = $autoint_0                                                     |            365 |     365 |     366 |            120 |               284623/0 |    20,257 | Fused in Pipeline 1 |
 | |                 +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
 | +NodeIndexSeek    | RANGE INDEX l:LineItem(quantity, revenue) WHERE quantity > $autoint_1 AND revenue IS NOT NULL, cache |         136055 | 3985720 | 3985721 |            120 |                22464/0 |   371,271 | In Pipeline 0       |
 |                   | [l.revenue]                                                                                          |                |         |         |                |                        |           |                     |
 +-------------------+------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+

 Total database accesses: 4351817, total allocated memory: 850136944

 1 row
 ready to start consuming query after 3 ms, results consumed after another 1383 ms
 */

 // but join on l is not as parallelizable as on d

 cypher runtime=parallel match (d:Date {year :2013}) 
 match (l:LineItem)-[:ON_DATE]->(d)
 using join on d
 using index l:LineItem(quantity,revenue)
 where l.quantity > 15 and l.revenue is not null
 return sum (l.revenue);

 /*
 +----------------------+
 | sum (l.revenue)      |
 +----------------------+
 | 1.5316525838816625E8 |
 +----------------------+

 1 row
 ready to start consuming query after 3 ms, results consumed after another 487 ms
 */

 // cleanup
 // :auto profile match (n) call { with n detach delete n } in transactions of 100000 rows;
 // Deleted 9494745 nodes, deleted 4745000 relationships, completed after 160522 ms.
	unwind range(2010,2022) as year unwind range(1,365) as day create (d:Date {year:year, day:day});
	// Added 4745 labels, created 4745 nodes, set 9490 properties, completed after 194 ms.
	create index for (d:Date) on (d.year);


	:auto unwind range(2010,2022) as year unwind range(1,365) as day match (d:Date {year:year, day:day})
	unwind range(1, 1000) as id
	call { with id,d
	create (l:LineItem {revenue:rand()1000, quantity:toInteger(rand()100)})
	create (l)-[:ON_DATE]->(d)
	} in transactions of 50000 rows;
	// Added 4745000 labels, created 4745000 nodes, set 9490000 properties, created 4745000 relationships, completed after 61032 ms.

	create index for (l:LineItem) on (l.quantity, l.revenue);

	profile
	match (d:Date {year :2013})
	match (l:LineItem)-[:ON_DATE]->(d)
	using join on l
	using index l:LineItem(quantity,revenue)
	where l.quantity > 15 and l.revenue is not null
	return sum (l.revenue);

	// Interestingly using join l is much faster than on d
	// possibly because there a fewer date nodes (365) that need to be iterated and checked against the line-item set

	/*
	╒══════════════════╕
	│"sum (l.revenue)" │
	╞══════════════════╡
	│153165258.38816753│
	└──────────────────┘
	*/
	// Cypher version: , planner: COST, runtime: PIPELINED. 4351817 total db hits in 1428 ms.

	profile
	match (d:Date {year :2013})
	match (l:LineItem)-[:ON_DATE]->(d)
	using join on l
	using index l:LineItem(quantity,revenue)
	where l.quantity > 15 and l.revenue is not null
	return sum (l.revenue);
	/*
	+----------------------+
	\| sum (l.revenue) \|
	+----------------------+
	\| 1.5316525838816935E8 \|
	+----------------------+

	+----------------------------------------------------------------------------------------------------+
	\| Plan \| Statement \| Version \| Planner \| Runtime \| Time \| DbHits \| Rows \| Memory (Bytes) \|
	+----------------------------------------------------------------------------------------------------+
	\| "PROFILE" \| "READ_ONLY" \| "" \| "COST" \| "PIPELINED" \| 1386 \| 4351817 \| 1 \| 850136944 \|
	+----------------------------------------------------------------------------------------------------+


	Planner COST

	Runtime PIPELINED

	Runtime version 5.0

	Batch size 1024

	+-------------------+------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
	\| Operator \| Details \| Estimated Rows \| Rows \| DB Hits \| Memory (Bytes) \| Page Cache Hits/Misses \| Time (ms) \| Pipeline \|
	+-------------------+------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
	\| +ProduceResults \| `sum (l.revenue)` \| 1 \| 1 \| 0 \| \| 0/0 \| 0,035 \| \|
	\| \| +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+ \|
	\| +EagerAggregation \| sum(cache[l.revenue]) AS `sum (l.revenue)` \| 1 \| 1 \| 0 \| 32 \| 0/0 \| 24,010 \| In Pipeline 3 \|
	\| \| +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
	\| +NodeHashJoin \| l \| 10466 \| 306596 \| 0 \| 850136712 \| \| 76,525 \| In Pipeline 2 \|
	\| \|\ +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
	\| \| +Expand(All) \| (d)<-[anon_0:ON_DATE]-(l) \| 365000 \| 365000 \| 365730 \| \| \| \| \|
	\| \| \| +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+ \| \| \|
	\| \| +NodeIndexSeek \| RANGE INDEX d:Date(year) WHERE year = $autoint_0 \| 365 \| 365 \| 366 \| 120 \| 284623/0 \| 20,257 \| Fused in Pipeline 1 \|
	\| \| +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+
	\| +NodeIndexSeek \| RANGE INDEX l:LineItem(quantity, revenue) WHERE quantity > $autoint_1 AND revenue IS NOT NULL, cache \| 136055 \| 3985720 \| 3985721 \| 120 \| 22464/0 \| 371,271 \| In Pipeline 0 \|
	\| \| [l.revenue] \| \| \| \| \| \| \| \|
	+-------------------+------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+

	Total database accesses: 4351817, total allocated memory: 850136944

	1 row
	ready to start consuming query after 3 ms, results consumed after another 1383 ms
	*/

	// but join on l is not as parallelizable as on d

	cypher runtime=parallel match (d:Date {year :2013})
	match (l:LineItem)-[:ON_DATE]->(d)
	using join on d
	using index l:LineItem(quantity,revenue)
	where l.quantity > 15 and l.revenue is not null
	return sum (l.revenue);

	/*
	+----------------------+
	\| sum (l.revenue) \|
	+----------------------+
	\| 1.5316525838816625E8 \|
	+----------------------+

	1 row
	ready to start consuming query after 3 ms, results consumed after another 487 ms
	*/

	// cleanup
	// :auto profile match (n) call { with n detach delete n } in transactions of 100000 rows;
	// Deleted 9494745 nodes, deleted 4745000 relationships, completed after 160522 ms.