Created
July 19, 2018 14:53
-
-
Save jef-sure/e737dbd52eeef7fcd5a04741d9304608 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env perl | |
| use strict; | |
| use warnings; | |
| use feature qw/say bitwise/; | |
| use experimental 'bitwise'; | |
| use utf8; | |
| use Benchmark ':all'; | |
| use Inline C => <<'END_OF_C_CODE'; | |
| #define IN_RANGE_INC(type,val,beg,end) \ | |
| ((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \ | |
| <= (unsigned type)((unsigned type)(end) - (unsigned type)(beg))) | |
| static inline UV | |
| decode_utf8_cp (unsigned char *s, STRLEN len, STRLEN *clen) | |
| { | |
| if (len >= 2 | |
| && IN_RANGE_INC (char, s[0], 0xc2, 0xdf) | |
| && IN_RANGE_INC (char, s[1], 0x80, 0xbf)) | |
| { | |
| *clen = 2; | |
| return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); | |
| } | |
| else | |
| return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY); | |
| } | |
| SV* first_diff_c(SV* sa, SV *sb) { | |
| STRLEN sa_len, sb_len; | |
| char *sa_ptr = SvPV (sa, sa_len); | |
| char *sb_ptr = SvPV (sb, sb_len); | |
| if(SvUTF8(sa) != SvUTF8(sb)) | |
| croak ("different argument encoding"); | |
| int i; | |
| int min_len = sa_len < sb_len? sa_len: sb_len; | |
| if(SvUTF8(sa)) { | |
| char *sa_end = SvEND(sa) + 1; | |
| char *sb_end = SvEND(sb) + 1; | |
| int cpos; | |
| for(cpos = i = 0; i < min_len; ++cpos) { | |
| STRLEN csa = 1, csb = 1; | |
| UV usa = decode_utf8_cp (sa_ptr + i, sa_end - sa_ptr, &csa); | |
| UV usb = decode_utf8_cp (sb_ptr + i, sb_end - sb_ptr, &csb); | |
| if(usa != usb) | |
| return newSViv(cpos); | |
| i += csa; | |
| } | |
| } else { | |
| for(i = 0; i < min_len; ++i) { | |
| if(sa_ptr[i] != sb_ptr[i]) | |
| return newSViv(i); | |
| } | |
| } | |
| if(sa_len != sb_len) | |
| return newSViv(SvCUR(sa)); | |
| return &PL_sv_undef; | |
| } | |
| END_OF_C_CODE | |
| binmode STDOUT, ':utf8'; | |
| sub first_diff_xor {("$_[0]" ^. "$_[1]") =~ /[^\x00]/ ? $-[0] : undef} | |
| sub first_diff_pos { | |
| my ($a, $b) = | |
| length(${$_[0]}) > length(${$_[1]}) | |
| ? @_ | |
| : ($_[1], $_[0]); | |
| while ($$a =~ m/(.)/g) { | |
| my $c = $1; | |
| return pos($$a) unless $$b =~ m/(.)/g && $c eq $1; | |
| } | |
| return undef; | |
| } | |
| sub first_diff_pos_gc { | |
| my ($a, $b) = | |
| length(${$_[0]}) > length(${$_[1]}) | |
| ? @_ | |
| : ($_[1], $_[0]); | |
| while ($$a =~ m/(.)/gc) { | |
| my $c = $1; | |
| return pos($$a) unless $$b =~ m/(.)/gc && $c eq $1; | |
| } | |
| return undef; | |
| } | |
| my @test = ( | |
| ["cat", "dog"], | |
| ["kater", "katze"], | |
| ["foo", "foobar"], | |
| ["i want to ride my bicycle!", "i want to ride my bicycle!"], | |
| ["Андрей", "Андрий"] | |
| ); | |
| #for my $t (@test) {say "$t->[0] ~~ $t->[1] = " . (first_diff_c(@$t) // "undef")} | |
| #exit; | |
| cmpthese( | |
| 1_000_000, { | |
| first_diff_pos => sub { | |
| first_diff_pos(\$_->[0], \$_->[1]) for @test; | |
| }, | |
| first_diff_pos_gc => sub { | |
| first_diff_pos_gc(\$_->[0], \$_->[1]) for @test; | |
| }, | |
| first_diff_xor => sub { | |
| first_diff_xor(@$_) for @test; | |
| }, | |
| first_diff_c => sub { | |
| first_diff_c(@$_) for @test; | |
| }, | |
| }, | |
| ); | |
| __END__ | |
| Rate first_diff_pos first_diff_xor first_diff_pos_gc first_diff_c | |
| first_diff_pos 189394/s -- -34% -58% -88% | |
| first_diff_xor 288184/s 52% -- -36% -81% | |
| first_diff_pos_gc 448430/s 137% 56% -- -71% | |
| first_diff_c 1538462/s 712% 434% 243% -- | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment