Last active
January 27, 2016 20:40
-
-
Save jtribble/f689dc0dc89b854d0775 to your computer and use it in GitHub Desktop.
Implementation of Levenshtein distance algorithm to compute string similarity. See: https://en.wikipedia.org/wiki/Levenshtein_distance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// Compute string similarity using Levenshtein distance algorithm | |
// | |
// See: https://en.wikipedia.org/wiki/Levenshtein_distance | |
// | |
/** | |
* @param {string} a - The first string | |
* @param {string} b - The second string | |
* @return {number} - An integer representation of the difference | |
*/ | |
function stringDistance(a, b) { | |
// handle base case (one or more empty strings) | |
if (a.length === 0 || b.length === 0) { | |
return Math.max(a.length, b.length); | |
} | |
// fill matrix of size (a+1)(b+1) with zeros | |
// (stores # of changes needed to make strings equal at each char) | |
let matrix = []; | |
for (let i = 0; i <= a.length; i++) { | |
matrix[i] = []; | |
for (let j = 0; j <= b.length; j++) { | |
matrix[i][j] = 0; | |
} | |
} | |
// populate x and y axis | |
for (let i = 1; i <= a.length; i++) { | |
matrix[i][0] = i; | |
} | |
for (let j = 1; j <= b.length; j++) { | |
matrix[0][j] = j; | |
} | |
// calculate distance per char | |
for (let j = 1; j <= b.length; j++) { | |
for (let i = 1; i <= a.length; i++) { | |
let cost = (a[i] === b[j]) ? 0 : 1; | |
matrix[i][j] = Math.min( | |
// deletion | |
matrix[i - 1][j] + 1, | |
// insertion | |
matrix[i][j - 1] + 1, | |
// substitution | |
matrix[i - 1][j - 1] + cost | |
); | |
} | |
} | |
return matrix[a.length][b.length]; | |
} | |
// | |
// Example tests using Mocha + Chai: | |
// | |
import chai from 'chai'; | |
const should = chai.should(); | |
describe('stringSimilarity()', () => { | |
it('should return 0 for same strings', () => { | |
stringSimilarity('testing', 'testing').should.equal(0); | |
}); | |
it('should handle empty strings', () => { | |
stringSimilarity('', '').should.equal(0); | |
stringSimilarity('1', '').should.equal(1); | |
stringSimilarity('', 'four').should.equal(4); | |
}); | |
it('should return distance for different strings', () => { | |
stringSimilarity('testing', 'testing1').should.equal(1); | |
stringSimilarity('testing', '2testing').should.equal(1); | |
stringSimilarity('testing', '3testing3').should.equal(2); | |
stringSimilarity('testing', '4te4st4in4g').should.equal(4); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment