Skip to content

Instantly share code, notes, and snippets.

@MasterDuke17
Created May 1, 2025 21:13
Show Gist options
  • Save MasterDuke17/d8403e036ab778bcaa47310dcdd89b56 to your computer and use it in GitHub Desktop.
Save MasterDuke17/d8403e036ab778bcaa47310dcdd89b56 to your computer and use it in GitHub Desktop.
use v6;
use nqp;
unit module Text::Diff::Sift4;
sub sift4(Str:D() $s1, Str:D() $s2, Int $maxOffset = 100, Int $maxDistance = 100 --> Int) is export {
my int $l1 = nqp::chars($s1);
my int $l2 = nqp::chars($s2);
return $l2 unless $l1;
return $l1 unless $l2;
my int $c1;
my int $c2;
my int $lcss;
my int $local_cs;
my int $trans;
my int $max_offset = $maxOffset;
my int $max_distance = $maxDistance;
my $offset_arr := nqp::list_i;
my int $isTrans;
my int $i;
while nqp::islt_i($c1, $l1) && nqp::islt_i($c2, $l2) {
if nqp::iseq_i(nqp::ordat($s1, $c1), nqp::ordat($s2, $c2)) {
nqp::stmts(
($isTrans = 0),
($i = 0),
($local_cs = nqp::add_i($local_cs, 1)),
nqp::while(nqp::islt_i($i, nqp::elems($offset_arr)),
nqp::stmts(
(my int $_trans = nqp::atpos_i($offset_arr, $i)),
(my int $_c1 = nqp::atpos_i($offset_arr, nqp::add_i($i, 1))),
(my int $_c2 = nqp::atpos_i($offset_arr, nqp::add_i($i, 2))),
nqp::if(nqp::isle_i($c1, $_c1) || nqp::isle_i($c2, $_c2),
nqp::stmts(
($isTrans = nqp::isge_i(nqp::abs_i(nqp::sub_i($c2, $c1)), nqp::abs_i(nqp::sub_i($_c2, $_c1)))),
nqp::if($isTrans,
($trans = nqp::add_i($trans, 1)),
nqp::unless($_trans,
nqp::stmts(nqp::bindpos_i($offset_arr, $i, 1),
($trans = nqp::add_i($trans, 1))))),
($i = 2147483647)),
nqp::if((nqp::isgt_i($c1, $_c2) && nqp::isgt_i($c2, $_c1)),
nqp::splice($offset_arr, nqp::list, $i, 3),
($i = nqp::add_i($i, 3)))))),
nqp::push_i($offset_arr, $isTrans),
nqp::push_i($offset_arr, $c1),
nqp::push_i($offset_arr, $c2))
} else {
nqp::stmts(
($lcss = nqp::add_i($lcss, $local_cs)),
($local_cs = 0),
nqp::if(nqp::isne_i($c1, $c2), ($c1 = $c2 = nqp::isle_i($c1, $c2) ?? $c1 !! $c2)),
($i = 0),
nqp::while(
nqp::islt_i($i, $max_offset) && (nqp::islt_i(nqp::add_i($c1, $i), $l1) ||
nqp::islt_i(nqp::add_i($c2, $i), $l2)),
nqp::stmts(
nqp::if(nqp::islt_i(nqp::add_i($c1, $i), $l1) &&
nqp::iseq_i(nqp::ordat($s1, nqp::add_i($c1, $i)),
nqp::ordat($s2, $c2)),
nqp::stmts(
($c1 = nqp::sub_i(nqp::add_i($c1, $i), 1)),
($c2 = nqp::sub_i($c2, 1)),
($i = 2147483647))),
nqp::if(nqp::islt_i(nqp::add_i($c2, $i), $l2) &&
nqp::iseq_i(nqp::ordat($s1, $c1), nqp::ordat($s2, nqp::add_i($c2, $i))),
nqp::stmts(
($c2 = nqp::sub_i(nqp::add_i($c2, $i), 1)),
($c1 = nqp::sub_i($c1, 1)),
($i = 2147483647))),
($i = nqp::add_i($i, 1)))))
}
$c1 = nqp::add_i($c1, 1);
$c2 = nqp::add_i($c2, 1);
nqp::if(nqp::isge_i( nqp::add_i(nqp::sub_i((nqp::isge_i($c1, $c2) ?? $c1 !! $c2), $lcss), $trans), $max_distance),
nqp::if($max_distance, return nqp::add_i(nqp::sub_i((nqp::isge_i($c1, $c2) ?? $c1 !! $c2), $lcss), $trans)));
nqp::if(nqp::isge_i($c1, $l1) || nqp::isge_i($c2, $l2),
nqp::stmts(
($lcss = nqp::add_i($lcss, $local_cs)),
($local_cs = 0),
($c1 = $c2 = (nqp::isle_i($c1, $c2) ?? $c1 !! $c2)))
);
}
$lcss = nqp::add_i($lcss, $local_cs);
nqp::add_i(nqp::sub_i((nqp::isge_i($l1, $l2) ?? $l1 !! $l2), $lcss), $trans)
}
# vim: ft=perl6
use v6;
use nqp;
unit module Text::Diff::Sift4;
class Offset {
has int $.c1 is built(:bind);
has int $.c2 is built(:bind);
has Bool $.trans is rw is built(:bind);
}
sub sift4(str $s1, str $s2, int $maxOffset = 100, int $maxDistance = 100 --> int) is export {
my int $l1 = chars($s1);
my int $l2 = chars($s2);
return $l2 unless $l1;
return $l1 unless $l2;
my int $c1;
my int $c2;
my int $lcss;
my int $local_cs;
my int $trans;
my @offset_arr;
my int $i;
my int $tempDistance;
my int $ofs-c1;
my int $ofs-c2;
my Offset $ofs;
while $c1 < $l1 and $c2 < $l2 {
if nqp::ordat($s1, $c1) == nqp::ordat($s2, $c2) {
++$local_cs;
my Bool $isTrans = False;
$i = 0;
while $i < @offset_arr.elems {
$ofs := @offset_arr[$i];
$ofs-c1 = $ofs.c1;
$ofs-c2 = $ofs.c2;
if $c1 <= $ofs-c1 or $c2 <= $ofs-c2 {
$isTrans = abs($c2 - $c1) >= abs($ofs-c2 - $ofs-c1);
if $isTrans {
++$trans;
} else {
if !$ofs.trans {
$ofs.trans = True;
++$trans;
}
}
last;
} else {
if $c1 > $ofs-c2 and $c2 > $ofs-c1 {
@offset_arr.splice($i, 1);
} else {
++$i;
}
}
}
@offset_arr.push(Offset.new(:$c1, :$c2, trans => $isTrans));
} else {
$lcss += $local_cs;
$local_cs = 0;
if $c1 != $c2 {
$c1 = $c2 = ($c1 min $c2);
}
loop ($i = 0; $i < $maxOffset and ($c1 + $i < $l1 or $c2 + $i < $l2); ++$i) {
if ($c1 + $i < $l1) and nqp::ordat($s1, $c1 + $i) == nqp::ordat($s2, $c2) {
$c1 += $i - 1;
--$c2;
last;
}
if ($c2 + $i < $l2) and nqp::ordat($s1, $c1) == nqp::ordat($s2, $c2 + $i) {
$c2 += $i - 1;
--$c1;
last;
}
}
}
++$c1;
++$c2;
if $maxDistance {
$tempDistance = ($c1 max $c2) - $lcss + $trans;
return $tempDistance if $tempDistance >= $maxDistance;
}
if $c1 >= $l1 or $c2 >= $l2 {
$lcss += $local_cs;
$local_cs = 0;
$c1 = $c2 = ($c1 min $c2);
}
}
$lcss += $local_cs;
return ($l1 max $l2) - $lcss + $trans;
}
# vim: ft=perl6
@lizmat
Copy link

lizmat commented May 1, 2025

Why is $IsTrans a Bool and not an int?

@MasterDuke17
Copy link
Author

  1. Are the returns in line 16/17 necessary?

  2. Lose the final return

  3. Algorithmically speaking: would it make sense to split the cases of first string larger / smaller than second string into separate parts, so you'd only need to check on boundary?

  4. Why is $IsTrans a Bool and not an int?

  1. not sure yet, i'll check
  2. right
  3. it's been about 9 years since i actually looked at the algorithm, but i'll think about it (or pass the question on to the original author)
  4. because that's how it was in the original JavaScript i ported from, but yeah, changing it (and the $.trans) is slightly faster

@MasterDuke17
Copy link
Author

3. Algorithmically speaking: would it make sense to split the cases of first string larger / smaller than second string into separate parts, so you'd only need to check on boundary?

Hm. I haven't gotten into the details of the algorithm yet, so I'm not actually sure what I'm about to say makes sense, but if you split them up maybe it wouldn't correctly track a character moving across that boundary?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment