Skip to content

Instantly share code, notes, and snippets.

@keynslug
Last active August 24, 2023 16:15
Show Gist options
  • Save keynslug/79d29a4cb41cab40a1347efbc8d100c6 to your computer and use it in GitHub Desktop.
Save keynslug/79d29a4cb41cab40a1347efbc8d100c6 to your computer and use it in GitHub Desktop.
Perf comparison / release-52 (Orig) vs PR#11481 + emqx_trie_search_2 (Head)
> emqx_topicidx_bench:run(orig, head, fixed_15k).
orig: [{memory,435909},
{name,emqx_topic_index_r52},
{size,15306},
{type,ordered_set},
{keypos,1},
{protection,public}]
head: [{memory,422409},
{name,emqx_topic_index}, %% with `emqx_trie_search_2`
{size,15306},
{type,ordered_set},
{keypos,1},
{protection,public}]
x Base Match / Fixed Topics (15000)
+ Head Match / Fixed Topics (15000)
+--------------------------------------------------------------------------+
|++++++++ + xxxxxxxxx xxxx x|
| +++++++ xxxxxxx |
| +++++++ xxxxxxx |
| ++++++ xxxxxxx |
| ++++ xxxxxxx |
| ++++ xxxxxx |
| ++++ xxxxx |
| ++++ xxxxx |
| ++++ xxxxx |
| ++++ xxxxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xx |
| +++ xx |
| +++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| + x |
| + x |
| + |
| + |
| + |
| + |
| |____MA____| |
| |_A| |
+--------------------------------------------------------------------------+
Dataset: x N=100 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 945.000
1st Qu. 960.000
Median: 965.000
3rd Qu. 970.000
Max: 1189.00
Average: 968.600 [ -2.45800e-3] ( 965.520 ‥ 977.980)
Std. Dev: 24.8551 [ -2.23955] ( 10.2191 ‥ 50.7679)
Outliers: 0/5 = 5 (μ=968.598, σ=22.6156)
Outlier variance: 0.199536 (moderate)
------
Dataset: + N=100 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 842.000
1st Qu. 855.000
Median: 859.000
3rd Qu. 865.000
Max: 889.000
Average: 859.630 [ -8.21500e-3] ( 858.360 ‥ 861.060)
Std. Dev: 6.95390 [ -7.58051e-2] ( 5.98019 ‥ 9.10706)
Outliers: 0/1 = 1 (μ=859.622, σ=6.87809)
Outlier variance: 9.90000e-3 (not affected by outliers)
Difference at 95.0% confidence
-108.970 ± 5.05868
-11.2503% ± 0.522267%
(Student's t, pooled s = 18.2501)
------
> meter_memory_pressure:measure({emqx_topicidx_bench, sample, [orig, emqx_topicidx_bench:topics(), TabOrig]}, #{spawn_opts => []}).
#{heap_reclaimed => 4749512,major_gcs => 8,
major_gcs_duration => 38,minor_gcs => 4765,
minor_gcs_duration => 14568,offheap_bin_reclaimed => 32,
stack_max => 36,stack_min => 10}
> meter_memory_pressure:measure({emqx_topicidx_bench, sample, [head, emqx_topicidx_bench:topics(), TabHead]}, #{spawn_opts => []}).
#{heap_reclaimed => 4499900,major_gcs => 12,
major_gcs_duration => 62,minor_gcs => 4504,
minor_gcs_duration => 13453,offheap_bin_reclaimed => 14718,
stack_max => 26,stack_min => 8}
> emqx_topicidx_bench:run(orig, head, fixed).
orig: [{memory,534},
{name,emqx_topic_index_r52},
{size,15},
{type,ordered_set},
{keypos,1},
{protection,public}]
head: [{memory,504},
{name,emqx_topic_index},
{size,15},
{type,ordered_set},
{keypos,1},
{protection,public}]
x Base Match / Fixed Topics
+ Head Match / Fixed Topics
+--------------------------------------------------------------------------+
|++++++++ + xxxxxxx x x x x|
| +++++++ + xxxxxxx x x |
| +++++++ xxxxxx |
| +++++++ xxxxxx |
| +++++++ xxxxxx |
| +++++++ xxxxxx |
| ++++++ xxxxxx |
| ++++++ xxxxxx |
| ++++++ xxxxx |
| +++++ xxxxx |
| +++++ xxxxx |
| +++++ xxxxx |
| +++++ xxxxx |
| ++++ xxxxx |
| ++++ xxxxx |
| ++++ xxxxx |
| ++++ xxxxx |
| ++++ xxxxx |
| ++++ xxxxx |
| ++++ xxxxx |
| ++++ xxxx |
| ++++ xxxx |
| ++++ xxxx |
| ++++ xxxx |
| ++++ xxx |
| ++++ xxx |
| ++++ xxx |
| ++++ xxx |
| ++++ xxx |
| ++++ xxx |
| ++++ xx |
| +++ xx |
| +++ xx |
| +++ xx |
| +++ xx |
| +++ xx |
| +++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| + xx |
| + xx |
| + xx |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| |__MA__| |
| |_A| |
+--------------------------------------------------------------------------+
Dataset: x N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 472.000
1st Qu. 479.000
Median: 482.000
3rd Qu. 486.000
Max: 684.000
Average: 484.880 [ -1.19175e-2] ( 483.280 ‥ 488.880)
Std. Dev: 17.0705 [ -1.18580] ( 8.11722 ‥ 35.0081)
Outliers: 0/10 = 10 (μ=484.868, σ=15.8847)
Outlier variance: 0.479555 (moderate)
------
Dataset: + N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 364.000
1st Qu. 375.000
Median: 380.000
3rd Qu. 384.000
Max: 410.000
Average: 380.300 [ 5.51700e-3] ( 379.420 ‥ 381.280)
Std. Dev: 6.69283 [ -3.43768e-2] ( 5.98309 ‥ 7.91268)
Outliers: 0/2 = 2 (μ=380.306, σ=6.65845)
Outlier variance: 0.185408 (moderate)
Difference at 95.0% confidence
-104.580 ± 2.54119
-21.5682% ± 0.524087%
(Student's t, pooled s = 12.9653)
------
> emqx_topicidx_bench:run(orig, head, fixed_rules).
orig: [{memory,292},
{name,emqx_topic_index_r52},
{size,5},
{type,ordered_set},
{keypos,1},
{protection,public}]
head: [{memory,282},
{name,emqx_topic_index},
{size,5},
{type,ordered_set},
{keypos,1},
{protection,public}]
x Base Match / Fixed Rules
+ Head Match / Fixed Rules
+--------------------------------------------------------------------------+
|++++++++++ + + xxxxxxxxxxxx x x x|
|+++++++++ xxxxxxxxxxx |
|+++++++++ xxxxxxxx x |
|+++++++++ xxxxxxxx x |
|++++++++ xxxxxx x x |
| +++++++ xxxxxx x |
| +++++++ xxxxx x |
| ++++++ xxxxx x |
| ++++++ xxxxx |
| ++++++ xxxxx |
| ++++++ xxxxx |
| ++++++ xxxxx |
| ++++++ xxxxx |
| ++++ + xxxxx |
| ++++ + xxxxx |
| ++++ + xxx x |
| ++++ + xxx x |
| ++++ xxx x |
| ++++ xxx x |
| ++++ xxx |
| ++++ xxx |
| ++++ xxx |
| ++++ xxx |
| ++++ xxx |
| ++++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| + + xxx |
| + + xxx |
| + + xxx |
| + + xxx |
| + + xxx |
| + + xx |
| + + xx |
| + + xx |
| + + xx |
| |__A___| |
| |_A__| |
+--------------------------------------------------------------------------+
Dataset: x N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 341.000
1st Qu. 347.000
Median: 349.000
3rd Qu. 352.000
Max: 430.000
Average: 350.610 [ 5.16300e-3] ( 349.745 ‥ 352.150)
Std. Dev: 8.11401 [ -0.244627] ( 5.48932 ‥ 14.7470)
Outliers: 0/16 = 16 (μ=350.615, σ=7.86938)
Outlier variance: 0.282306 (moderate)
------
Dataset: + N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 247.000
1st Qu. 252.000
Median: 255.000
3rd Qu. 258.000
Max: 292.000
Average: 255.650 [ 2.53900e-3] ( 254.970 ‥ 256.455)
Std. Dev: 5.34982 [ -5.04898e-2] ( 4.48862 ‥ 7.47539)
Outliers: 0/3 = 3 (μ=255.653, σ=5.29933)
Outlier variance: 0.243399 (moderate)
Difference at 95.0% confidence
-94.9600 ± 1.34698
-27.0842% ± 0.384181%
(Student's t, pooled s = 6.87232)
------
> emqx_topicidx_bench:run(orig, head, fixed_5k_rules).
orig: [{memory,145417},
{name,emqx_topic_index_r52},
{size,5102},
{type,ordered_set},
{keypos,1},
{protection,public}]
head: [{memory,140917},
{name,emqx_topic_index},
{size,5102},
{type,ordered_set},
{keypos,1},
{protection,public}]
x Base Match / Fixed Rules (5000)
+ Head Match / Fixed Rules (5000)
+--------------------------------------------------------------------------+
|+++++++ + xxxxxxxxx x x x|
|++++++ xxxxxxx x x |
|++++++ xxxxxxx |
|++++++ xxxxxx |
|++++++ xxxxx |
|++++++ xxxxx |
|++++ xxxxx |
|++++ xxxxx |
|++++ xxxxx |
|++++ xxxxx |
|++++ xxxxx |
|++++ xxxxx |
| +++ xxxx |
| +++ xxx |
| +++ xxx |
| +++ xxx |
| +++ xx |
| +++ xx |
| ++ xx |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| + |
| + |
| + |
| + |
| |___MA____| |
| |A_| |
+--------------------------------------------------------------------------+
Dataset: x N=100 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 757.000
1st Qu. 762.000
Median: 766.000
3rd Qu. 773.000
Max: 944.000
Average: 770.320 [ -2.43220e-2] ( 767.680 ‥ 777.350)
Std. Dev: 20.6642 [ -1.67223] ( 9.42241 ‥ 43.9936)
Outliers: 0/6 = 6 (μ=770.296, σ=18.9919)
Outlier variance: 0.209491 (moderate)
------
Dataset: + N=100 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 649.000
1st Qu. 654.000
Median: 657.000
3rd Qu. 661.000
Max: 693.000
Average: 658.070 [ 3.20800e-3] ( 656.990 ‥ 659.540)
Std. Dev: 6.37745 [ -0.102468] ( 5.08890 ‥ 9.54159)
Outliers: 0/2 = 2 (μ=658.073, σ=6.27498)
Outlier variance: 9.90000e-3 (not affected by outliers)
Difference at 95.0% confidence
-112.250 ± 4.23868
-14.5719% ± 0.550249%
(Student's t, pooled s = 15.2918)
------
> emqx_topicidx_bench:run(orig, head, topics_only).
orig: [{memory,14364},
{name,emqx_topic_index_r52},
{size,441},
{type,ordered_set},
{keypos,1},
{protection,public}]
head: [{memory,8040},
{name,emqx_topic_index},
{size,441},
{type,ordered_set},
{keypos,1},
{protection,public}]
x Base Match / Topics = Filters
+ Head Match / Topics = Filters
+--------------------------------------------------------------------------+
|+++++++ xxxxxxxx x|
|+++++ xxxxxx |
|+++++ xxxx |
|++++ xxxx |
| +++ xxxx |
| +++ xxx |
| +++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + xx |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| |_A_| |
| A| |
+--------------------------------------------------------------------------+
Dataset: x N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 639.000
1st Qu. 647.000
Median: 650.000
3rd Qu. 656.000
Max: 840.000
Average: 652.950 [ -1.91550e-3] ( 651.575 ‥ 656.845)
Std. Dev: 15.2825 [ -1.17011] ( 7.16635 ‥ 33.1022)
Outliers: 0/6 = 6 (μ=652.948, σ=14.1124)
Outlier variance: 0.287165 (moderate)
------
Dataset: + N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 291.000
1st Qu. 298.000
Median: 301.000
3rd Qu. 303.000
Max: 338.000
Average: 301.550 [ 6.55200e-3] ( 300.850 ‥ 302.430)
Std. Dev: 5.64596 [ -5.67173e-2] ( 4.55236 ‥ 7.59386)
Outliers: 0/10 = 10 (μ=301.557, σ=5.58924)
Outlier variance: 0.204698 (moderate)
Difference at 95.0% confidence
-351.400 ± 2.25797
-53.8173% ± 0.345810%
(Student's t, pooled s = 11.5202)
------
> emqx_topicidx_bench:run(orig, head, topics_rules).
orig: [{memory,4902},
{name,emqx_topic_index_r52},
{size,147},
{type,ordered_set},
{keypos,1},
{protection,public}]
head: [{memory,2794},
{name,emqx_topic_index},
{size,147},
{type,ordered_set},
{keypos,1},
{protection,public}]
x Base Match / Topics = Rules
+ Head Match / Topics = Rules
+--------------------------------------------------------------------------+
|++++ + xxxxxxx x x|
|++++ xxxxx |
|++++ xxxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|+++ xxxx |
|+++ xxxx |
|+++ xxxx |
|+++ xxxx |
| ++ xxxx |
| ++ xxxx |
| ++ xxxx |
| ++ xxxx |
| ++ xxxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| ++ x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + x |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| + |
| |_A__| |
| A| |
+--------------------------------------------------------------------------+
Dataset: x N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 471.000
1st Qu. 476.000
Median: 478.000
3rd Qu. 482.000
Max: 659.000
Average: 480.600 [ 6.15700e-3] ( 479.305 ‥ 484.520)
Std. Dev: 14.5070 [ -1.11031] ( 6.10103 ‥ 33.9857)
Outliers: 0/8 = 8 (μ=480.606, σ=13.3967)
Outlier variance: 0.400233 (moderate)
------
Dataset: + N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 218.000
1st Qu. 223.000
Median: 226.000
3rd Qu. 228.000
Max: 257.000
Average: 226.325 [ 4.75500e-4] ( 225.805 ‥ 227.005)
Std. Dev: 4.26501 [ -4.34957e-2] ( 3.52085 ‥ 6.23960)
Outliers: 0/4 = 4 (μ=226.325, σ=4.22152)
Outlier variance: 0.204831 (moderate)
Difference at 95.0% confidence
-254.275 ± 2.09565
-52.9078% ± 0.436050%
(Student's t, pooled s = 10.6921)
------
> emqx_topicidx_bench:run(orig, head, topics_with_filters).
orig: [{memory,14643},
{name,emqx_topic_index_r52},
{size,453},
{type,ordered_set},
{keypos,1},
{protection,public}]
head: [{memory,8319},
{name,emqx_topic_index},
{size,453},
{type,ordered_set},
{keypos,1},
{protection,public}]
x Base Match / Topics = Filters
+ Head Match / Topics = Filters
+--------------------------------------------------------------------------+
|++++++ + + xxxxxxx x x|
|+++++ xxxxxxx |
|++++ xxxxxx |
|++++ xxxxx |
|++++ xxxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxxx |
|++++ xxx |
|++++ xxx |
|+++ xxx |
|+++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xxx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ xx |
| ++ x |
| ++ x |
| ++ x |
| ++ |
| + |
| + |
| + |
| |_A_| |
||_A| |
+--------------------------------------------------------------------------+
Dataset: x N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 952.000
1st Qu. 964.000
Median: 969.000
3rd Qu. 973.000
Max: 1163.00
Average: 970.755 [ -6.85150e-3] ( 969.075 ‥ 974.425)
Std. Dev: 17.1205 [ -1.00484] ( 9.65094 ‥ 34.9268)
Outliers: 0/12 = 12 (μ=970.748, σ=16.1156)
Outlier variance: 0.185449 (moderate)
------
Dataset: + N=200 CI=95.0000
Statistic Value [ Bias] (Bootstrapped LB‥UB)
Min: 533.000
1st Qu. 540.000
Median: 546.000
3rd Qu. 551.000
Max: 627.000
Average: 546.755 [ 1.69750e-3] ( 545.570 ‥ 548.420)
Std. Dev: 9.92914 [ -0.163475] ( 7.60798 ‥ 15.9447)
Outliers: 0/3 = 3 (μ=546.757, σ=9.76567)
Outlier variance: 0.195045 (moderate)
Difference at 95.0% confidence
-424.000 ± 2.74294
-43.6773% ± 0.282558%
(Student's t, pooled s = 13.9946)
------
%%--------------------------------------------------------------------
%% Copyright (c) 2023 EMQ Technologies Co., Ltd. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%--------------------------------------------------------------------
%% @doc Topic index for matching topics to topic filters.
%%
%% Works on top of ETS ordered_set table. Keys are tuples constructed from
%% parsed topic filters and record IDs, wrapped in a tuple to order them
%% strictly greater than unit tuple (`{}`). Existing table may be used if
%% existing keys will not collide with index keys.
%%
%% Designed to effectively answer questions like:
%% 1. Does any topic filter match given topic?
%% 2. Which records are associated with topic filters matching given topic?
%% 3. Which topic filters match given topic?
%% 4. Which record IDs are associated with topic filters matching given topic?
%%
%% Trie-search algorithm:
%%
%% Given a 3-level topic (e.g. a/b/c), if we leave out '#' for now,
%% all possible subscriptions of a/b/c can be enumerated as below:
%%
%% a/b/c
%% a/b/+
%% a/+/c <--- subscribed
%% a/+/+
%% +/b/c <--- subscribed
%% +/b/+
%% +/+/c
%% +/+/+ <--- start searching upward from here
%%
%% Let's name this search space "Space1".
%% If we brute-force it, the scope would be 8 (2^3).
%% Meaning this has O(2^N) complexity (N being the level of topics).
%%
%% This clearly isn't going to work.
%% Should we then try to enumerate all subscribers instead?
%% If there are also other subscriptions, e.g. "+/x/y" and "+/b/0"
%%
%% a/+/c <--- match of a/b/c
%% +/x/n
%% ...
%% +/x/2
%% +/x/1
%% +/b/c <--- match of a/b/c
%% +/b/1
%% +/b/0
%%
%% Let's name it "Space2".
%%
%% This has O(M * L) complexity (M being the total number of subscriptions,
%% and L being the number of topic levels).
%% This is usually a lot smaller than "Space1", but still not very effective
%% if the collection size is e.g. 1 million.
%%
%% To make it more effective, we'll need to combine the two algorithms:
%% Use the ordered subscription topics' prefixes as starting points to make
%% guesses about whether or not the next word can be a '+', and skip-over
%% to the next possible match.
%%
%% NOTE: A prerequisite of the ordered collection is, it should be able
%% to find the *immediate-next* topic/filter with a given prefix.
%%
%% In the above example, we start from "+/b/0". When comparing "+/b/0"
%% with "a/b/c", we know the matching prefix is "+/b", meaning we can
%% start guessing if the next word is '+' or 'c':
%% * It can't be '+' because '+' < '0'
%% * It might be 'c' because 'c' > '0'
%%
%% So, we try to jump to the next topic which has a prefix of "+/b/c"
%% (this effectively means skipping over "+/b/1").
%%
%% After "+/b/c" is found to be a matching filter, we move up:
%% * The next possible match is "a/+/+" according to Space1
%% * The next subscription is "+/x/1" according to Space2
%%
%% "a/+/+" is lexicographically greater than "+/x/+", so let's jump to
%% the immediate-next of 'a/+/+', which is "a/+/c", allowing us to skip
%% over all the ones starting with "+/x".
%%
%% If we take '#' into consideration, it's only one extra comparison to see
%% if a filter ends with '#'.
%%
%% In summary, the complexity of this algorithm is O(N * L)
%% N being the number of total matches, and L being the level of the topic.
-module(emqx_trie_search_2).
-export([make_key/2]).
-export([match/2, matches/3, get_id/1, get_topic/1]).
-export_type([key/1, word/0, nextf/0, opts/0]).
-type word() :: binary() | '+' | '#'.
-type base_key() :: {binary() | [word()], {}}.
-type key(ID) :: {binary() | [word()], {ID}}.
-type nextf() :: fun((key(_) | base_key()) -> '$end_of_table' | key(_)).
-type opts() :: [unique | return_first].
%% @doc Make a search-key for the given topic.
-spec make_key(emqx_types:topic(), ID) -> key(ID).
make_key(Topic, ID) when is_binary(Topic) ->
Words = words(Topic),
case lists:any(fun erlang:is_atom/1, Words) of
true ->
%% it's a wildcard
{Words, {ID}};
false ->
%% Not a wildcard. We do not split the topic
%% because they can be found with direct lookups.
%% it is also more compact in memory.
{Topic, {ID}}
end.
%% @doc Extract record ID from the match.
-spec get_id(key(ID)) -> ID.
get_id({_Filter, {ID}}) ->
ID.
%% @doc Extract topic (or topic filter) from the match.
-spec get_topic(key(_ID)) -> emqx_types:topic().
get_topic({Filter, _ID}) when is_list(Filter) ->
emqx_topic:join(Filter);
get_topic({Topic, _ID}) ->
Topic.
-compile({inline, [base/1, move_up/2, match_add/2, compare/3]}).
%% Make the base-key which can be used to locate the desired search target.
base(Prefix) ->
{Prefix, {}}.
%% Move the search target to the key next to the given Base.
move_up(NextF, Base) ->
NextF(Base).
%% @doc Match given topic against the index and return the first match, or `false` if
%% no match is found.
-spec match(emqx_types:topic(), nextf()) -> false | key(_).
match(Topic, NextF) ->
try search(Topic, NextF, [return_first]) of
_ -> false
catch
throw:{first, Res} ->
Res
end.
%% @doc Match given topic against the index and return _all_ matches.
%% If `unique` option is given, return only unique matches by record ID.
-spec matches(emqx_types:topic(), nextf(), opts()) -> [key(_)].
matches(Topic, NextF, Opts) ->
search(Topic, NextF, Opts).
%% @doc Entrypoint of the search for a given topic.
search(Topic, NextF, Opts) ->
{Words, Prefix} = match_init(Topic),
ORetFirst = proplists:get_bool(return_first, Opts),
OUnique = proplists:get_bool(unique, Opts),
Acc0 =
case ORetFirst of
true ->
first;
false when OUnique ->
#{};
false ->
[]
end,
Matches =
case search_new(Words, base(Prefix), NextF, Acc0) of
{Cursor, Acc} ->
match_non_wildcards(Topic, Cursor, NextF, Acc);
Acc ->
Acc
end,
case is_map(Matches) of
true ->
maps:values(Matches);
false ->
Matches
end.
%% The recursive entrypoint of the trie-search algorithm.
%% Always start from the initial prefix and words.
search_new(Words0, NewBase, NextF, Acc) ->
case move_up(NextF, NewBase) of
'$end_of_table' ->
Acc;
Cursor ->
search_up(Words0, Cursor, NextF, Acc)
end.
%% Search to the bigger end of ordered collection of topics and topic-filters.
search_up(Words, {Filter, _} = Cursor, NextF, Acc) ->
case compare(Filter, Words, false) of
match_full ->
search_new(Words, Cursor, NextF, match_add(Cursor, Acc));
match_prefix ->
search_new(Words, Cursor, NextF, Acc);
lower ->
{Cursor, Acc};
[SeekWord | FilterTail] ->
% NOTE
% This is a backtrack.
% If we visualize the `Filter` as `FilterHead ++ [_] ++ FilterTail`, we need to move up to:
% `FilterHead ++ [SeekWord]`. Here, we need `FilterTail` to find when to stop recursion.
NewBase = base(seek(SeekWord, Filter, FilterTail)),
search_new(Words, NewBase, NextF, Acc)
end.
seek(SeekWord, [_ | FilterTail], FilterTail) ->
[SeekWord];
seek(SeekWord, [FilterWord | Rest], FilterTail) ->
[FilterWord | seek(SeekWord, Rest, FilterTail)].
compare(NotFilter, _, _) when is_binary(NotFilter) ->
lower;
compare([], [], _) ->
match_full;
compare([], _Words, _) ->
match_prefix;
compare(['#'], _Words, _) ->
match_full;
compare(['+' | TF], [HW | TW], _Backtrack) ->
% NOTE
% In case of cursor is lower than the topic words, we will need to backtrack to the most
% recent '+', move from base consisting of the filter up to the most recent '+' but not
% including it, and the respective topic word.
compare(TF, TW, [HW | TF]);
compare([HW | TF], [HW | TW], Backtrack) ->
compare(TF, TW, Backtrack);
compare([HF | _], [HW | _], false) when HF > HW ->
% NOTE
% There's nowhere to backtrack to, we're out of the search space.
lower;
compare([HF | _], [HW | _], Backtrack) when HF > HW ->
% NOTE
% There was a plus, return the backtrack for the next move.
Backtrack;
compare([_ | _], [], false) ->
lower;
compare([_ | _], [], Backtrack) ->
Backtrack;
compare([_HF | TF], [HW | _], _) ->
% NOTE
% Topic is higher than the filter, we're need to skip over to the next possible filter.
[HW | TF].
match_add(K = {_Filter, ID}, Acc = #{}) ->
% NOTE: ensuring uniqueness by record ID
Acc#{ID => K};
match_add(K, Acc) when is_list(Acc) ->
[K | Acc];
match_add(K, first) ->
throw({first, K}).
match_init(Topic) ->
case words(Topic) of
Words = [W = <<"$", _/bytes>> | _] ->
% NOTE
% This will effectively skip attempts to match special topics to `#` or `+/...`.
{Words, [W]};
Words ->
{Words, []}
end.
-spec words(emqx_types:topic()) -> [word()].
words(Topic) when is_binary(Topic) ->
% NOTE
% This is almost identical to `emqx_topic:words/1`, but it doesn't convert empty
% tokens to ''. This is needed to keep ordering of words consistent with what
% `match_filter/3` expects.
[word(W) || W <- emqx_topic:tokens(Topic)].
-spec word(binary()) -> word().
word(<<"+">>) -> '+';
word(<<"#">>) -> '#';
word(Bin) -> Bin.
match_non_wildcards(Topic, {Topic, _} = Cursor, NextF, Acc) ->
match_non_wildcards(Cursor, NextF, match_add(Cursor, Acc));
match_non_wildcards(Topic, _Cursor, NextF, Acc) ->
match_non_wildcards(base(Topic), NextF, Acc).
match_non_wildcards({Topic, _} = Base, NextF, Acc) ->
case NextF(Base) of
{Topic, _ID} = Key ->
match_non_wildcards(Key, NextF, match_add(Key, Acc));
_Other ->
Acc
end.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment