Skip to content

Instantly share code, notes, and snippets.

@jef-sure
Created July 19, 2018 14:53
Show Gist options
  • Select an option

  • Save jef-sure/e737dbd52eeef7fcd5a04741d9304608 to your computer and use it in GitHub Desktop.

Select an option

Save jef-sure/e737dbd52eeef7fcd5a04741d9304608 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use strict;
use warnings;
use feature qw/say bitwise/;
use experimental 'bitwise';
use utf8;
use Benchmark ':all';
use Inline C => <<'END_OF_C_CODE';
#define IN_RANGE_INC(type,val,beg,end) \
((unsigned type)((unsigned type)(val) - (unsigned type)(beg)) \
<= (unsigned type)((unsigned type)(end) - (unsigned type)(beg)))
static inline UV
decode_utf8_cp (unsigned char *s, STRLEN len, STRLEN *clen)
{
if (len >= 2
&& IN_RANGE_INC (char, s[0], 0xc2, 0xdf)
&& IN_RANGE_INC (char, s[1], 0x80, 0xbf))
{
*clen = 2;
return ((s[0] & 0x1f) << 6) | (s[1] & 0x3f);
}
else
return utf8n_to_uvuni (s, len, clen, UTF8_CHECK_ONLY);
}
SV* first_diff_c(SV* sa, SV *sb) {
STRLEN sa_len, sb_len;
char *sa_ptr = SvPV (sa, sa_len);
char *sb_ptr = SvPV (sb, sb_len);
if(SvUTF8(sa) != SvUTF8(sb))
croak ("different argument encoding");
int i;
int min_len = sa_len < sb_len? sa_len: sb_len;
if(SvUTF8(sa)) {
char *sa_end = SvEND(sa) + 1;
char *sb_end = SvEND(sb) + 1;
int cpos;
for(cpos = i = 0; i < min_len; ++cpos) {
STRLEN csa = 1, csb = 1;
UV usa = decode_utf8_cp (sa_ptr + i, sa_end - sa_ptr, &csa);
UV usb = decode_utf8_cp (sb_ptr + i, sb_end - sb_ptr, &csb);
if(usa != usb)
return newSViv(cpos);
i += csa;
}
} else {
for(i = 0; i < min_len; ++i) {
if(sa_ptr[i] != sb_ptr[i])
return newSViv(i);
}
}
if(sa_len != sb_len)
return newSViv(SvCUR(sa));
return &PL_sv_undef;
}
END_OF_C_CODE
binmode STDOUT, ':utf8';
sub first_diff_xor {("$_[0]" ^. "$_[1]") =~ /[^\x00]/ ? $-[0] : undef}
sub first_diff_pos {
my ($a, $b) =
length(${$_[0]}) > length(${$_[1]})
? @_
: ($_[1], $_[0]);
while ($$a =~ m/(.)/g) {
my $c = $1;
return pos($$a) unless $$b =~ m/(.)/g && $c eq $1;
}
return undef;
}
sub first_diff_pos_gc {
my ($a, $b) =
length(${$_[0]}) > length(${$_[1]})
? @_
: ($_[1], $_[0]);
while ($$a =~ m/(.)/gc) {
my $c = $1;
return pos($$a) unless $$b =~ m/(.)/gc && $c eq $1;
}
return undef;
}
my @test = (
["cat", "dog"],
["kater", "katze"],
["foo", "foobar"],
["i want to ride my bicycle!", "i want to ride my bicycle!"],
["Андрей", "Андрий"]
);
#for my $t (@test) {say "$t->[0] ~~ $t->[1] = " . (first_diff_c(@$t) // "undef")}
#exit;
cmpthese(
1_000_000, {
first_diff_pos => sub {
first_diff_pos(\$_->[0], \$_->[1]) for @test;
},
first_diff_pos_gc => sub {
first_diff_pos_gc(\$_->[0], \$_->[1]) for @test;
},
first_diff_xor => sub {
first_diff_xor(@$_) for @test;
},
first_diff_c => sub {
first_diff_c(@$_) for @test;
},
},
);
__END__
Rate first_diff_pos first_diff_xor first_diff_pos_gc first_diff_c
first_diff_pos 189394/s -- -34% -58% -88%
first_diff_xor 288184/s 52% -- -36% -81%
first_diff_pos_gc 448430/s 137% 56% -- -71%
first_diff_c 1538462/s 712% 434% 243% --
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment