Last active
February 24, 2022 00:58
-
-
Save JoshCheek/01e5a65c6f7ead3671736e5d35ea3ba1 to your computer and use it in GitHub Desktop.
Rebuild `closure_tree` ancestry in 1 query
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This rebuilds the ancestry for `closure_tree` gem with a single query (https://github.com/ClosureTree/closure_tree) | |
| # (rather than the ungodly number of queries it currently uses, which makes it extremely slow) | |
| # ESTABLISH DATABASE CONNECTION | |
| require 'pg' | |
| db = PG.connect dbname: 'nuvocargo_development' | |
| def db.exec(...) | |
| super(...).map { _1.transform_keys &:to_sym } | |
| rescue StandardError | |
| $!.set_backtrace caller.drop(1) | |
| raise | |
| end | |
| # DUPLICATE THE STRUCTURE SO WE CAN COMPARE OUR TABLE TO THEIRS | |
| db.exec <<~SQL | |
| create temp table josh_geolocation_hierarchies ( | |
| ancestor_id integer not null, | |
| descendant_id integer not null, | |
| generations integer not null | |
| ); | |
| create unique index josh_geolocation_anc_desc_idx on josh_geolocation_hierarchies (ancestor_id, descendant_id, generations); | |
| create index josh_geolocation_desc_idx on josh_geolocation_hierarchies (descendant_id); | |
| SQL | |
| # REBUILD THE HIERARCHIES WITH A SINGLE QUERY (this assumes the table is empty to start with) | |
| start = Time.now | |
| db.exec(<<~SQL).map { _1.values } | |
| insert into josh_geolocation_hierarchies (ancestor_id, descendant_id, generations) | |
| with recursive ancestors(ancestor_id, descendant_id, generations) AS ( | |
| -- base query to get the roots | |
| select | |
| id as ancestor_id, | |
| id as descendant_id, | |
| 0::integer as generations | |
| from geolocations | |
| union all | |
| -- recursive query to get the children | |
| select | |
| parent.ancestor_id, | |
| child.id as descendant_id, | |
| parent.generations+1 as generations | |
| from ancestors as parent | |
| inner join geolocations as child on child.parent_id = parent.descendant_id | |
| ) | |
| select * | |
| from ancestors; | |
| SQL | |
| our_duration = Time.now - start | |
| # SHOW THAT IT WORKS | |
| expected = db.exec 'select * from geolocation_hierarchies order by ancestor_id, descendant_id, generations' | |
| actual = db.exec 'select * from josh_geolocation_hierarchies order by ancestor_id, descendant_id, generations' | |
| expected == actual # => true | |
| # WHAT IS THE SPEED IMPROVEMENT? | |
| their_duration = 1064.125469 | |
| their_duration # => 1064.125469 | |
| our_duration # => 0.922901 | |
| improvement = their_duration / our_duration # => 1153.0223382572997 | |
| # CONCLUSION | |
| # | |
| # When I ran `Geolocation.rebuild!` in the rails console, it took 17.7 minutes (1064 seconds) | |
| # Here, it takes less than 1 second. | |
| # Ours is 1153 times faster!!!!!!!! | |
| # | |
| # Here's a few stats about the data I ran it against: | |
| # select count(1) from geolocations; # => 95171 | |
| # select count(1) from geolocation_hierarchies; # => 444620 | |
| # | |
| # Caveat: | |
| # the Rails console logs every query it runs, which is also expensive, | |
| # so their implementation is probably not that slow in practice... | |
| # buuuuut, then again, the Rails console was running against my local db on | |
| # my same computer, so the latency of running a query is also lower |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment