-
-
Save pichi/4d01443dc65e3fd6cc3784ec6998c16b to your computer and use it in GitHub Desktop.
Find top n items in a unordered list
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-module(test). | |
-compile(export_all). | |
%% API | |
-compile({inline, [ insert/2 | |
, merge/2 | |
]}). | |
insert(E, []) -> [E]; | |
insert(E, [E2|_] = H) when E =< E2 -> [E, H]; | |
insert(E, [E2|H]) -> [E2, [E]|H]. | |
merge(H1, []) -> H1; | |
merge([E1|H1], [E2|_]=H2) when E1 =< E2 -> [E1, H2|H1]; | |
merge(H1, [E2|H2]) -> [E2, H1|H2]. | |
merge_pairs([]) -> []; | |
merge_pairs([H]) -> H; | |
merge_pairs([A, B|T]) -> merge(merge(A, B), merge_pairs(T)). | |
run_pheap(List, Num) -> | |
run_pheap(List, Num, []). | |
run_pheap(List, 0, Heap) -> | |
pheap_full(List, Heap); | |
run_pheap([], 0, Heap) -> | |
finish(Heap, []); | |
run_pheap([{Y, X, _} = H|T], N, Heap) -> | |
run_pheap(T, N-1, insert({{X, Y}, H}, Heap)). | |
pheap_full([], Heap) -> | |
finish(Heap, []); | |
pheap_full([{Y, X, _} = H|T], [{K, _}|HeapT] = Heap) -> | |
case {X, Y} of | |
N when N > K -> | |
pheap_full(T, insert({N, H}, merge_pairs(HeapT))); | |
_ -> | |
pheap_full(T, Heap) | |
end. | |
finish([], Acc) -> Acc; | |
finish([{_, H}|T], Acc) -> | |
finish(merge_pairs(T), [H|Acc]). | |
run_all_sorted_sublist(List, Num) -> | |
L = [ {{X, Y}, Z} || {Y, X, _} = Z <- List], | |
[ X || {_, X} <- lists:sublist(lists:reverse(lists:sort(L)), Num) ]. | |
run_gb_trees(List, Num) -> | |
run_gb_trees(List, Num, gb_trees:empty()). | |
run_gb_trees([], _Num, Acc) -> | |
lists:foldl(fun({Key1, Key2, Key3}, Sum) -> [{Key2, Key1, Key3}|Sum] end, [], gb_trees:keys(Acc)); | |
run_gb_trees([{Key1, Key2, Key3}|List], Num, Acc) -> | |
NewKey = {Key2, Key1, Key3}, | |
NewAcc = | |
case gb_trees:size(Acc) < Num of | |
true -> | |
gb_trees:insert(NewKey, 0, Acc); | |
false -> | |
{Smallest,_} = gb_trees:smallest(Acc), | |
case Smallest < NewKey of | |
true -> | |
Acc2 = gb_trees:delete(Smallest, Acc), | |
gb_trees:insert(NewKey, 0, Acc2); | |
false -> | |
Acc | |
end | |
end, | |
run_gb_trees(List, Num, NewAcc). | |
run_small_to_big_order_list(List, Num) -> | |
run_small_to_big_order_list(List, Num, 0, []). | |
run_small_to_big_order_list([], _Num, _, Acc) -> | |
lists:foldl(fun({Key1, Key2, Key3}, Sum) -> [{Key2, Key1, Key3}|Sum] end, [], Acc); | |
run_small_to_big_order_list([{Key1, Key2, Key3}|List], Num, Len, Acc) -> | |
NewKey = {Key2, Key1, Key3}, | |
{NewLen, NewAcc} = | |
case Len < Num of | |
true -> | |
{Len + 1, lists:sort([NewKey|Acc])}; | |
false -> | |
[Smallest|Acc2] = Acc, | |
case Smallest < NewKey of | |
true -> | |
{Len, lists:sort([NewKey|Acc2])}; | |
false -> | |
{Len, Acc} | |
end | |
end, | |
run_small_to_big_order_list(List, Num, NewLen, NewAcc). | |
run_small_to_big_order_list_v2(List, Num) -> | |
run_small_to_big_order_list_v2(List, Num, fun({X2, X1, _}, {Y2, Y1, _}) | |
-> {X1, X2} =< {Y1, Y2} end, 0, []). | |
run_small_to_big_order_list_v2([], _, _, _, Acc) -> | |
lists:reverse(Acc); | |
run_small_to_big_order_list_v2([NewKey|List], Num, Fun, Len, Acc) -> | |
{NewLen, NewAcc} = | |
if Len < Num -> {Len + 1, insert(NewKey, Acc, Fun)}; | |
true -> | |
[Smallest|Acc2] = Acc, | |
case Fun(Smallest,NewKey) of | |
true -> {Len, insert(NewKey, Acc2, Fun)}; | |
false -> {Len, Acc} | |
end | |
end, | |
run_small_to_big_order_list_v2(List, Num, Fun, NewLen, NewAcc). | |
insert(K, [], _) -> [K]; | |
insert(K, [H|T], Fun) -> | |
case Fun(K, H) of | |
true -> [K, H|T]; | |
false -> [H|insert(K, T, Fun)] | |
end. | |
run_big_to_small_order_list(List, Num) -> | |
run_big_to_small_order_list(List, Num, 0, []). | |
run_big_to_small_order_list([], _Num, _, Acc) -> | |
Acc; | |
run_big_to_small_order_list([Key|List], Num, Len, Acc) -> | |
{NewLen, NewAcc} = | |
case Len < Num of | |
true -> | |
{Len + 1, lists:sort(fun({X2, X1, _}, {Y2, Y1, _}) -> {X1, X2} > {Y1, Y2} end, [Key|Acc])}; | |
false -> | |
Smallest = lists:last(Acc), | |
case Smallest < Key of | |
true -> | |
Acc2 = lists:sublist(Acc, Len - 1), | |
{Len, lists:sort(fun({X2, X1, _}, {Y2, Y1, _}) -> {X1, X2} > {Y1, Y2} end, [Key|Acc2])}; | |
false -> | |
{Len, Acc} | |
end | |
end, | |
run_big_to_small_order_list(List, Num, NewLen, NewAcc). | |
test() -> | |
[ test(TotalNum, TopNum) | |
|| TotalNum <- [5000, 10000, 15000, 20000, 25000], | |
TopNum <- [20, 30, 40, 50, 60, 70]]. | |
%% Test | |
test(TotalNum, TopNum) -> | |
List = prepared_random_list(TotalNum), | |
%io:format("~p~n", [List]), | |
Self = self(), | |
L = lists:keysort(2, | |
[ receive {Pid, X} -> X end | |
|| {F, Type} <- | |
[{run_all_sorted_sublist, all_sorted_sublist________}, | |
{run_gb_trees, gb_tree___________________}, | |
{run_small_to_big_order_list, small_to_big_order_list___}, | |
{run_big_to_small_order_list, big_to_small_order_list___}, | |
{run_small_to_big_order_list_v2, small_to_big_order_list_v2}, | |
{run_pheap, pheap_____________________}], | |
Pid <- [spawn_link(fun() -> | |
{Time, Result} = timer:tc(?MODULE, F, [List, TopNum]), | |
Self ! {self(), {Type, Time, Result}} | |
end)] | |
]), | |
[{_, Time1, _}|_] = L, | |
[_] = lists:ukeysort(3, L), | |
{{TotalNum, TopNum}, [{Type, Time, Time/Time1*100} | |
|| {Type, Time, _} <- L]}. | |
prepared_random_list(TotalNum) -> | |
[ {X, X+1, X+2} | |
|| {_, X} <- lists:sort( | |
[ {rand:uniform(), X} | |
|| X <- lists:seq(1, TotalNum) ]) | |
]. |
When {X, Y}
from {Y, X, _}
used as comparing key used pairing heap is still fastest.
> rp(test:test()).
[{{5000,20},
[{pheap_____________________,208,100.0},
{small_to_big_order_list_v2,416,200.0},
{small_to_big_order_list___,421,202.40384615384616},
{gb_tree___________________,490,235.5769230769231},
{big_to_small_order_list___,1266,608.6538461538462},
{all_sorted_sublist________,2544,1223.076923076923}]},
{{5000,30},
[{pheap_____________________,241,100.0},
{small_to_big_order_list_v2,534,221.5767634854772},
{gb_tree___________________,577,239.41908713692945},
{small_to_big_order_list___,889,368.87966804979254},
{big_to_small_order_list___,2396,994.1908713692947},
{all_sorted_sublist________,2553,1059.3360995850621}]},
{{5000,40},
[{pheap_____________________,264,100.0},
{small_to_big_order_list_v2,657,248.86363636363637},
{gb_tree___________________,697,264.0151515151515},
{small_to_big_order_list___,1769,670.0757575757576},
{all_sorted_sublist________,2588,980.3030303030303},
{big_to_small_order_list___,3825,1448.8636363636363}]},
{{5000,50},
[{pheap_____________________,301,100.0},
{gb_tree___________________,668,221.9269102990033},
{small_to_big_order_list_v2,839,278.7375415282392},
{small_to_big_order_list___,1677,557.1428571428571},
{all_sorted_sublist________,3181,1056.810631229236},
{big_to_small_order_list___,5570,1850.4983388704318}]},
{{5000,60},
[{pheap_____________________,319,100.0},
{gb_tree___________________,708,221.9435736677116},
{small_to_big_order_list_v2,929,291.22257053291537},
{small_to_big_order_list___,2255,706.8965517241379},
{all_sorted_sublist________,2429,761.4420062695925},
{big_to_small_order_list___,6623,2076.175548589342}]},
{{5000,70},
[{pheap_____________________,348,100.0},
{gb_tree___________________,773,222.1264367816092},
{small_to_big_order_list_v2,1113,319.8275862068965},
{all_sorted_sublist________,2421,695.6896551724138},
{small_to_big_order_list___,2798,804.0229885057471},
{big_to_small_order_list___,9017,2591.0919540229884}]},
{{10000,20},
[{pheap_____________________,537,100.0},
{small_to_big_order_list_v2,893,166.29422718808195},
{gb_tree___________________,1061,197.57914338919926},
{small_to_big_order_list___,1213,225.88454376163872},
{big_to_small_order_list___,2191,408.00744878957175},
{all_sorted_sublist________,6821,1270.2048417132216}]},
{{10000,30},
[{pheap_____________________,467,100.0},
{small_to_big_order_list_v2,1141,244.32548179871517},
{gb_tree___________________,1151,246.46680942184153},
{small_to_big_order_list___,1238,265.0963597430407},
{big_to_small_order_list___,3297,705.9957173447538},
{all_sorted_sublist________,6513,1394.646680942184}]},
{{10000,40},
[{pheap_____________________,512,100.0},
{small_to_big_order_list_v2,1144,223.4375},
{gb_tree___________________,1254,244.921875},
{small_to_big_order_list___,1673,326.7578125},
{big_to_small_order_list___,4833,943.9453125},
{all_sorted_sublist________,7945,1551.7578125}]},
{{10000,50},
[{pheap_____________________,559,100.0},
{gb_tree___________________,1276,228.26475849731662},
{small_to_big_order_list_v2,1327,237.3881932021467},
{small_to_big_order_list___,2078,371.7352415026834},
{all_sorted_sublist________,6550,1171.7352415026835},
{big_to_small_order_list___,7019,1255.6350626118067}]},
{{10000,60},
[{pheap_____________________,633,100.0},
{gb_tree___________________,1373,216.9036334913112},
{small_to_big_order_list_v2,1599,252.60663507109004},
{small_to_big_order_list___,2942,464.77093206951025},
{all_sorted_sublist________,6798,1073.9336492890995},
{big_to_small_order_list___,8977,1418.1674565560822}]},
{{10000,70},
[{pheap_____________________,655,100.0},
{gb_tree___________________,1404,214.3511450381679},
{small_to_big_order_list_v2,1791,273.4351145038168},
{small_to_big_order_list___,3504,534.9618320610687},
{all_sorted_sublist________,7089,1082.290076335878},
{big_to_small_order_list___,11657,1779.6946564885495}]},
{{15000,20},
[{pheap_____________________,625,100.0},
{small_to_big_order_list___,1032,165.12},
{small_to_big_order_list_v2,1413,226.08},
{gb_tree___________________,1542,246.72},
{big_to_small_order_list___,3128,500.48},
{all_sorted_sublist________,10457,1673.1200000000001}]},
{{15000,30},
[{pheap_____________________,638,100.0},
{small_to_big_order_list_v2,1412,221.31661442006268},
{small_to_big_order_list___,1537,240.9090909090909},
{gb_tree___________________,1611,252.50783699059562},
{big_to_small_order_list___,4411,691.3793103448276},
{all_sorted_sublist________,10565,1655.9561128526645}]},
{{15000,40},
[{pheap_____________________,692,100.0},
{small_to_big_order_list_v2,1586,229.1907514450867},
{gb_tree___________________,1616,233.52601156069363},
{small_to_big_order_list___,1977,285.6936416184971},
{big_to_small_order_list___,6200,895.9537572254336},
{all_sorted_sublist________,10481,1514.5953757225434}]},
{{15000,50},
[{pheap_____________________,714,100.0},
{small_to_big_order_list_v2,1808,253.22128851540614},
{gb_tree___________________,1828,256.0224089635854},
{small_to_big_order_list___,2724,381.51260504201684},
{big_to_small_order_list___,8821,1235.4341736694678},
{all_sorted_sublist________,10629,1488.655462184874}]},
{{15000,60},
[{pheap_____________________,768,100.0},
{gb_tree___________________,1822,237.23958333333334},
{small_to_big_order_list_v2,2080,270.83333333333337},
{small_to_big_order_list___,3452,449.4791666666667},
{all_sorted_sublist________,10152,1321.875},
{big_to_small_order_list___,11040,1437.5}]},
{{15000,70},
[{pheap_____________________,758,100.0},
{gb_tree___________________,1870,246.7018469656992},
{small_to_big_order_list_v2,2185,288.2585751978892},
{small_to_big_order_list___,3698,487.8627968337731},
{all_sorted_sublist________,10733,1415.9630606860158},
{big_to_small_order_list___,12817,1690.8970976253297}]},
{{20000,20},
[{pheap_____________________,1542,100.0},
{small_to_big_order_list___,1652,107.13359273670558},
{small_to_big_order_list_v2,2228,144.48767833981842},
{gb_tree___________________,2297,148.96238651102465},
{big_to_small_order_list___,4023,260.89494163424126},
{all_sorted_sublist________,13749,891.6342412451362}]},
{{20000,30},
[{pheap_____________________,1577,100.0},
{small_to_big_order_list___,2006,127.20355104629041},
{small_to_big_order_list_v2,2299,145.78313253012047},
{gb_tree___________________,2340,148.38300570703868},
{big_to_small_order_list___,5309,336.65187064045654},
{all_sorted_sublist________,12764,809.3849080532657}]},
{{20000,40},
[{pheap_____________________,1575,100.0},
{small_to_big_order_list_v2,2403,152.57142857142858},
{gb_tree___________________,2463,156.38095238095238},
{small_to_big_order_list___,2564,162.7936507936508},
{big_to_small_order_list___,7816,496.2539682539682},
{all_sorted_sublist________,13601,863.5555555555555}]},
{{20000,50},
[{pheap_____________________,1627,100.0},
{gb_tree___________________,2584,158.819913952059},
{small_to_big_order_list_v2,2593,159.37307928703135},
{small_to_big_order_list___,3209,197.23417332513827},
{big_to_small_order_list___,10383,638.1684081130916},
{all_sorted_sublist________,12997,798.8322065150584}]},
{{20000,60},
[{pheap_____________________,1517,100.0},
{gb_tree___________________,2560,168.75411997363216},
{small_to_big_order_list_v2,2729,179.8945286750165},
{small_to_big_order_list___,3947,260.18457481872116},
{big_to_small_order_list___,13099,863.4805537244562},
{all_sorted_sublist________,13600,896.5062623599209}]},
{{20000,70},
[{pheap_____________________,1719,100.0},
{gb_tree___________________,2711,157.70796974985458},
{small_to_big_order_list_v2,2927,170.27341477603258},
{small_to_big_order_list___,4466,259.8022105875509},
{all_sorted_sublist________,14636,851.4252472367655},
{big_to_small_order_list___,15626,909.0168702734148}]},
{{25000,20},
[{pheap_____________________,1022,100.0},
{small_to_big_order_list___,1529,149.60861056751466},
{small_to_big_order_list_v2,2107,206.16438356164383},
{gb_tree___________________,2577,252.15264187866927},
{big_to_small_order_list___,4458,436.2035225048924},
{all_sorted_sublist________,20115,1968.1996086105673}]},
{{25000,30},
[{pheap_____________________,1185,100.0},
{small_to_big_order_list___,2335,197.0464135021097},
{small_to_big_order_list_v2,2561,216.11814345991561},
{gb_tree___________________,2573,217.13080168776372},
{big_to_small_order_list___,6899,582.1940928270042},
{all_sorted_sublist________,21925,1850.2109704641348}]},
{{25000,40},
[{pheap_____________________,1153,100.0},
{small_to_big_order_list_v2,2674,231.9167389418907},
{gb_tree___________________,2743,237.90112749349524},
{small_to_big_order_list___,2945,255.42064180398958},
{big_to_small_order_list___,8707,755.160450997398},
{all_sorted_sublist________,20801,1804.0763226366003}]},
{{25000,50},
[{pheap_____________________,1163,100.0},
{small_to_big_order_list_v2,2777,238.77901977644024},
{gb_tree___________________,2846,244.71195184866724},
{small_to_big_order_list___,3738,321.4101461736887},
{big_to_small_order_list___,12032,1034.5657781599311},
{all_sorted_sublist________,21689,1864.9183147033532}]},
{{25000,60},
[{pheap_____________________,1192,100.0},
{small_to_big_order_list_v2,2927,245.55369127516778},
{gb_tree___________________,2945,247.06375838926172},
{small_to_big_order_list___,4864,408.0536912751678},
{big_to_small_order_list___,14562,1221.6442953020135},
{all_sorted_sublist________,20155,1690.8557046979865}]},
{{25000,70},
[{pheap_____________________,1248,100.0},
{gb_tree___________________,3106,248.8782051282051},
{small_to_big_order_list_v2,3654,292.78846153846155},
{small_to_big_order_list___,5396,432.3717948717949},
{big_to_small_order_list___,19181,1536.9391025641025},
{all_sorted_sublist________,19811,1587.4198717948718}]}]
ok
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Pairing heap implementation is significantly faster.