Skip to content

Instantly share code, notes, and snippets.

@jq2
Created March 17, 2020 16:37
Show Gist options
  • Save jq2/5ba5395bfd5aa1647f8cdc419d3f9e41 to your computer and use it in GitHub Desktop.
Save jq2/5ba5395bfd5aa1647f8cdc419d3f9e41 to your computer and use it in GitHub Desktop.
[ bash ]: Levenshtein distance (#regex, Approximate “fuzzy” matching)
#!/bin/bash
function levenshtein {
if (( $# != 2 )); then
echo "Usage: $0 word1 word2" >&2
elif (( ${#1} < ${#2} )); then
levenshtein "$2" "$1"
else
local str1len=${#1}
local str2len=${#2}
local d
for (( i = 0; i <= (str1len+1)*(str2len+1); i++ )); do
d[i]=0
done
for (( i = 0; i <= str1len; i++ )); do
d[i+0*str1len]=$i
done
for (( j = 0; j <= str2len; j++ )); do
d[0+j*(str1len+1)]=$j
done
for (( j = 1; j <= str2len; j++ )); do
for (( i = 1; i <= str1len; i++ )); do
[ "${1:i-1:1}" = "${2:j-1:1}" ] && local cost=0 || local cost=1
del=$(( d[(i-1)+str1len*j]+1 ))
ins=$(( d[i+str1len*(j-1)]+1 ))
alt=$(( d[(i-1)+str1len*(j-1)]+cost ))
d[i+str1len*j]=$( echo -e "$del\n$ins\n$alt" | sort -n | head -1 )
done
done
echo ${d[str1len+str1len*(str2len)]}
fi
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment