Last active
September 27, 2022 14:47
-
-
Save jexp/255dad7bc6328060e20fdb937b950056 to your computer and use it in GitHub Desktop.
Cypher Optimization for aggregation and compound indexes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
unwind range(2010,2022) as year unwind range(1,365) as day create (d:Date {year:year, day:day}); | |
// Added 4745 labels, created 4745 nodes, set 9490 properties, completed after 194 ms. | |
create index for (d:Date) on (d.year); | |
:auto unwind range(2010,2022) as year unwind range(1,365) as day match (d:Date {year:year, day:day}) | |
unwind range(1, 1000) as id | |
call { with id,d | |
create (l:LineItem {revenue:rand()*1000, quantity:toInteger(rand()*100)}) | |
create (l)-[:ON_DATE]->(d) | |
} in transactions of 50000 rows; | |
// Added 4745000 labels, created 4745000 nodes, set 9490000 properties, created 4745000 relationships, completed after 61032 ms. | |
create index for (l:LineItem) on (l.quantity, l.revenue); | |
profile | |
match (d:Date {year :2013}) | |
match (l:LineItem)-[:ON_DATE]->(d) | |
using join on l | |
using index l:LineItem(quantity,revenue) | |
where l.quantity > 15 and l.revenue is not null | |
return sum (l.revenue); | |
// Interestingly using join l is much faster than on d | |
// possibly because there a fewer date nodes (365) that need to be iterated and checked against the line-item set | |
/* | |
╒══════════════════╕ | |
│"sum (l.revenue)" │ | |
╞══════════════════╡ | |
│153165258.38816753│ | |
└──────────────────┘ | |
*/ | |
// Cypher version: , planner: COST, runtime: PIPELINED. 4351817 total db hits in 1428 ms. | |
profile | |
match (d:Date {year :2013}) | |
match (l:LineItem)-[:ON_DATE]->(d) | |
using join on l | |
using index l:LineItem(quantity,revenue) | |
where l.quantity > 15 and l.revenue is not null | |
return sum (l.revenue); | |
/* | |
+----------------------+ | |
| sum (l.revenue) | | |
+----------------------+ | |
| 1.5316525838816935E8 | | |
+----------------------+ | |
+----------------------------------------------------------------------------------------------------+ | |
| Plan | Statement | Version | Planner | Runtime | Time | DbHits | Rows | Memory (Bytes) | | |
+----------------------------------------------------------------------------------------------------+ | |
| "PROFILE" | "READ_ONLY" | "" | "COST" | "PIPELINED" | 1386 | 4351817 | 1 | 850136944 | | |
+----------------------------------------------------------------------------------------------------+ | |
Planner COST | |
Runtime PIPELINED | |
Runtime version 5.0 | |
Batch size 1024 | |
+-------------------+------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+ | |
| Operator | Details | Estimated Rows | Rows | DB Hits | Memory (Bytes) | Page Cache Hits/Misses | Time (ms) | Pipeline | | |
+-------------------+------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+ | |
| +ProduceResults | `sum (l.revenue)` | 1 | 1 | 0 | | 0/0 | 0,035 | | | |
| | +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+ | | |
| +EagerAggregation | sum(cache[l.revenue]) AS `sum (l.revenue)` | 1 | 1 | 0 | 32 | 0/0 | 24,010 | In Pipeline 3 | | |
| | +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+ | |
| +NodeHashJoin | l | 10466 | 306596 | 0 | 850136712 | | 76,525 | In Pipeline 2 | | |
| |\ +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+ | |
| | +Expand(All) | (d)<-[anon_0:ON_DATE]-(l) | 365000 | 365000 | 365730 | | | | | | |
| | | +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+ | | | | |
| | +NodeIndexSeek | RANGE INDEX d:Date(year) WHERE year = $autoint_0 | 365 | 365 | 366 | 120 | 284623/0 | 20,257 | Fused in Pipeline 1 | | |
| | +------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+ | |
| +NodeIndexSeek | RANGE INDEX l:LineItem(quantity, revenue) WHERE quantity > $autoint_1 AND revenue IS NOT NULL, cache | 136055 | 3985720 | 3985721 | 120 | 22464/0 | 371,271 | In Pipeline 0 | | |
| | [l.revenue] | | | | | | | | | |
+-------------------+------------------------------------------------------------------------------------------------------+----------------+---------+---------+----------------+------------------------+-----------+---------------------+ | |
Total database accesses: 4351817, total allocated memory: 850136944 | |
1 row | |
ready to start consuming query after 3 ms, results consumed after another 1383 ms | |
*/ | |
// but join on l is not as parallelizable as on d | |
cypher runtime=parallel match (d:Date {year :2013}) | |
match (l:LineItem)-[:ON_DATE]->(d) | |
using join on d | |
using index l:LineItem(quantity,revenue) | |
where l.quantity > 15 and l.revenue is not null | |
return sum (l.revenue); | |
/* | |
+----------------------+ | |
| sum (l.revenue) | | |
+----------------------+ | |
| 1.5316525838816625E8 | | |
+----------------------+ | |
1 row | |
ready to start consuming query after 3 ms, results consumed after another 487 ms | |
*/ | |
// cleanup | |
// :auto profile match (n) call { with n detach delete n } in transactions of 100000 rows; | |
// Deleted 9494745 nodes, deleted 4745000 relationships, completed after 160522 ms. | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Original question & answer here:
https://community.neo4j.com/t5/neo4j-graph-platform/query-relationship-poor-performance/m-p/60615#M35907
Page cache config: 4G
Heap config: 4G
Neo4j 5.0.0
Java 19