Created
September 16, 2014 04:23
-
-
Save ken39arg/206086121d3b3e4bcaca to your computer and use it in GitHub Desktop.
重複無しの重み付きランダム抽選をする際のベンチ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use common::sense; | |
| use Benchmark qw(:all); | |
| use Sub::Rate::NoMaxRate; | |
| use List::MoreUtils qw(none any); | |
| my @putturn1; | |
| push @putturn1, [ 100000, "a100000-1" ]; | |
| push @putturn1, map { [ 5, "a5-$_" ] } (1..50); | |
| push @putturn1, map { [ 3, "a3-$_" ] } (1..100); | |
| push @putturn1, map { [ 1, "a1-$_" ] } (1..1000); | |
| my @putturn2; | |
| push @putturn2, [ 10, "a10-1" ]; | |
| push @putturn2, map { [ 5, "b5-$_" ] } (1..50); | |
| push @putturn2, map { [ 3, "b3-$_" ] } (1..100); | |
| push @putturn2, map { [ 1, "b1-$_" ] } (1..1000); | |
| sub after_check { | |
| my ( $putturn, $num ) = @_; | |
| my $rate = Sub::Rate::NoMaxRate->new; | |
| for my $p ( @$putturn ) { | |
| $rate->add( $p->[0] => sub { $p->[1] } ); | |
| } | |
| my $func = $rate->generate; | |
| my @result; | |
| while ( @result < $num ) { | |
| my $v = $func->(); | |
| if ( none {$_ eq $v} @result ) { | |
| push @result, $v; | |
| } | |
| } | |
| return \@result; | |
| } | |
| sub before_check { | |
| my ( $putturn, $num ) = @_; | |
| my $rate = Sub::Rate::NoMaxRate->new; | |
| my @result; | |
| while ( @result < $num ) { | |
| $rate->clear; | |
| for my $p (@$putturn) { | |
| next if any {$_ eq $p->[1]} @result; | |
| $rate->add( $p->[0] => sub { $p->[1] } ); | |
| } | |
| push @result, $rate->generate->(); | |
| } | |
| return \@result; | |
| } | |
| # say join(":", @{ before_check( \@putturn2, 10 ) } ); | |
| # say join(":", @{ before_check( \@putturn1, 10 ) } ); | |
| # | |
| # say join(":", @{ after_check( \@putturn2, 10 ) } ); | |
| # say join(":", @{ after_check( \@putturn1, 10 ) } ); | |
| # | |
| # exit; | |
| my $bentch_result = timethese( 10, { | |
| 'after balance' => sub { | |
| after_check( \@putturn2, 20 ); | |
| }, | |
| 'after unbalance' => sub { | |
| after_check( \@putturn1, 20 ); | |
| }, | |
| 'before balance' => sub { | |
| before_check( \@putturn2, 20 ); | |
| }, | |
| 'before unbalance' => sub { | |
| before_check( \@putturn1, 20 ); | |
| }, | |
| }); | |
| cmpthese $bentch_result; | |
| __DATA__ | |
| Benchmark: timing 10 iterations of after balance, after unbalance, before balance, before unbalance... | |
| after balance: 1 wallclock secs ( 0.90 usr + 0.00 sys = 0.90 CPU) @ 11.11/s (n=10) | |
| after unbalance: 1 wallclock secs ( 0.99 usr + 0.00 sys = 0.99 CPU) @ 10.10/s (n=10) | |
| before balance: 18 wallclock secs (18.47 usr + 0.01 sys = 18.48 CPU) @ 0.54/s (n=10) | |
| before unbalance: 19 wallclock secs (18.55 usr + 0.02 sys = 18.57 CPU) @ 0.54/s (n=10) | |
| s/iter before unbalance before balance after unbalance after balance | |
| before unbalance 1.86 -- -0% -95% -95% | |
| before balance 1.85 0% -- -95% -95% | |
| after unbalance 9.90e-02 1776% 1767% -- -9% | |
| after balance 9.00e-02 1963% 1953% 10% -- | |
Author
Author
putturn1を5000000にしても下記の通り。
Benchmark: timing 10 iterations of after balance, after unbalance, before balance, before unbalance...
after balance: 1 wallclock secs ( 0.94 usr + 0.01 sys = 0.95 CPU) @ 10.53/s (n=10)
after unbalance: 4 wallclock secs ( 4.29 usr + 0.00 sys = 4.29 CPU) @ 2.33/s (n=10)
before balance: 18 wallclock secs (18.52 usr + 0.01 sys = 18.53 CPU) @ 0.54/s (n=10)
before unbalance: 20 wallclock secs (19.37 usr + 0.02 sys = 19.39 CPU) @ 0.52/s (n=10)
s/iter before unbalance before balance after unbalance after balance
before unbalance 1.94 -- -4% -78% -95%
before balance 1.85 5% -- -77% -95%
after unbalance 0.429 352% 332% -- -78%
after balance 9.50e-02 1941% 1851% 352% --
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
抽出数に対して少ない要素が圧倒的な重みを占める場合、ヤバいんじゃないのという指摘があったけど、
Sub::Rate::NoMaxRateを使った場合抽選の度に重複を除いて再生成するよりも、結果から重複を除いた方が全然早かった