rohjay · May 11, 2023 15:37
diff --git a/func.compare_strings.php b/func.compare_strings.php
 <?php

 // I PHP-ified Ben's work from here: https://github.com/ben-yocum/gestalt-pattern-matcher (thanks for sharing Ben!)
 // made a few modifications and added comments to better understand how this works. Enjoy =]
 function compare_strings(string $input1, string $input2, bool $case_insensitive = true): float {
    if ( $input1 == '' || $input2 == '' ) {
        return 0.0;
    }

    // Case insensitive is the preferred, but you do you
    if ( $case_insensitive ) {
        $input1 = strtolower($input1);
        $input2 = strtolower($input2);
    }

    // Build the foundation of the stack with our comparison strings...
    $stack = [
        $input1,
        $input2
    ];

    // Set the "score" at zero.
    $score = 0;

    // I added this in here due to a lack of understanding how this worked. Keeping it 😜
    $prevention_count = 1000;

    while( count($stack) > 0 && $prevention_count > 0 ) {
        // Pop the comparison strings off the top...
        $string2 = array_pop($stack);
        $string1 = array_pop($stack);

        // Use a split array of the strings so I don't have to substr with offsets all over the place.
        $string1_split = str_split($string1);
        $string2_split = str_split($string2);

        // Grab the string length once
        $str1_len = strlen($string1);
        $str2_len = strlen($string2);

        // Set up the indicators for sameness
        $sequence_length = 0;
        $sequence_index1 = -1;
        $sequence_index2 = -1;

        // Loop the length of string 1
        for($i = 0; $i < $str1_len; $i++) {
            // Loop the length of string 2
            for($j = 0; $j < $str2_len; $j++) {
                // Our wildcard to track from our current position out to wherever the strings stop matching
                $k = 0;
                while(isset($string1_split[$i+$k]) && isset($string2_split[$j+$k]) && $string1_split[$i+$k] === $string2_split[$j+$k]) {
                    ++$k;
                }

                // Now that we've seen how far out $k gets us... Let's see if this is streak-worthy ;)
                if($k > $sequence_length) {
                    $sequence_length = $k; // Our wildcard that tracks how far out a same-streak goes
                    $sequence_index1 = $i; // Where the streak starts in string 1
                    $sequence_index2 = $j; // Where the streak starts in string 2
                }
            }
        }

        // We've looped through both strings independently and found ONE similarity, let's tally the score and add more
        // things to look at around where this was.
        if($sequence_length !== 0) {
            // Tally the score based off the sequence length (remember our wildcard $k from before? This is the biggest
            // we saw of that)
            $score += $sequence_length * 2;

            // If our matching sequence didn't start at the beginning of either string, there could be matches here yet.
            if( $sequence_index1 !== 0 && $sequence_index2 !== 0 ) {
                $stack[] = substr($string1, 0, $sequence_index1);
                $stack[] = substr($string2, 0, $sequence_index2);
            }

            // If our matching sequence didn't butt up to the end of either string, there are still potential matches.
            if(
                $sequence_index1 + $sequence_length !== $str1_len
                && $sequence_index2 + $sequence_length !== $str2_len
            ) {
                $stack[] = substr($string1, $sequence_index1 + $sequence_length, $str1_len);
                $stack[] = substr($string2, $sequence_index2 + $sequence_length, $str2_len);
            }
        }

        // Because I never trust this will stop. Never.
        --$prevention_count;
    }

    // This gives us a relative score normalized by the length of the two strings.
    $strlengths = strlen($input1) + strlen($input2);
    $rating = $score / ($strlengths != 0 ? $strlengths : 1); // Habit. Never divide a goose egg.

    // I multiply by 100 here and round to the 5th decimal to give me a Percentage figure. For instance:
    // 12.34567% match is not a great match. 98.76543% match is pretty good! An exact match is 100.
    return round($rating * 100, 5);
 }
	<?php

	// I PHP-ified Ben's work from here: https://github.com/ben-yocum/gestalt-pattern-matcher (thanks for sharing Ben!)
	// made a few modifications and added comments to better understand how this works. Enjoy =]
	function compare_strings(string $input1, string $input2, bool $case_insensitive = true): float {
	if ( $input1 == '' \|\| $input2 == '' ) {
	return 0.0;
	}

	// Case insensitive is the preferred, but you do you
	if ( $case_insensitive ) {
	$input1 = strtolower($input1);
	$input2 = strtolower($input2);
	}

	// Build the foundation of the stack with our comparison strings...
	$stack = [
	$input1,
	$input2
	];

	// Set the "score" at zero.
	$score = 0;

	// I added this in here due to a lack of understanding how this worked. Keeping it 😜
	$prevention_count = 1000;

	while( count($stack) > 0 && $prevention_count > 0 ) {
	// Pop the comparison strings off the top...
	$string2 = array_pop($stack);
	$string1 = array_pop($stack);

	// Use a split array of the strings so I don't have to substr with offsets all over the place.
	$string1_split = str_split($string1);
	$string2_split = str_split($string2);

	// Grab the string length once
	$str1_len = strlen($string1);
	$str2_len = strlen($string2);

	// Set up the indicators for sameness
	$sequence_length = 0;
	$sequence_index1 = -1;
	$sequence_index2 = -1;

	// Loop the length of string 1
	for($i = 0; $i < $str1_len; $i++) {
	// Loop the length of string 2
	for($j = 0; $j < $str2_len; $j++) {
	// Our wildcard to track from our current position out to wherever the strings stop matching
	$k = 0;
	while(isset($string1_split[$i+$k]) && isset($string2_split[$j+$k]) && $string1_split[$i+$k] === $string2_split[$j+$k]) {
	++$k;
	}

	// Now that we've seen how far out $k gets us... Let's see if this is streak-worthy ;)
	if($k > $sequence_length) {
	$sequence_length = $k; // Our wildcard that tracks how far out a same-streak goes
	$sequence_index1 = $i; // Where the streak starts in string 1
	$sequence_index2 = $j; // Where the streak starts in string 2
	}
	}
	}

	// We've looped through both strings independently and found ONE similarity, let's tally the score and add more
	// things to look at around where this was.
	if($sequence_length !== 0) {
	// Tally the score based off the sequence length (remember our wildcard $k from before? This is the biggest
	// we saw of that)
	$score += $sequence_length * 2;

	// If our matching sequence didn't start at the beginning of either string, there could be matches here yet.
	if( $sequence_index1 !== 0 && $sequence_index2 !== 0 ) {
	$stack[] = substr($string1, 0, $sequence_index1);
	$stack[] = substr($string2, 0, $sequence_index2);
	}

	// If our matching sequence didn't butt up to the end of either string, there are still potential matches.
	if(
	$sequence_index1 + $sequence_length !== $str1_len
	&& $sequence_index2 + $sequence_length !== $str2_len
	) {
	$stack[] = substr($string1, $sequence_index1 + $sequence_length, $str1_len);
	$stack[] = substr($string2, $sequence_index2 + $sequence_length, $str2_len);
	}
	}

	// Because I never trust this will stop. Never.
	--$prevention_count;
	}

	// This gives us a relative score normalized by the length of the two strings.
	$strlengths = strlen($input1) + strlen($input2);
	$rating = $score / ($strlengths != 0 ? $strlengths : 1); // Habit. Never divide a goose egg.

	// I multiply by 100 here and round to the 5th decimal to give me a Percentage figure. For instance:
	// 12.34567% match is not a great match. 98.76543% match is pretty good! An exact match is 100.
	return round($rating * 100, 5);
	}