Created
March 10, 2025 10:08
-
-
Save raynoppe/771bf2b141fbd34f78d780a171ee1fff to your computer and use it in GitHub Desktop.
Levenshtein Distance and Spelling Correction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Calculate Levenshtein distance between two strings | |
* @param {string} str1 - First string | |
* @param {string} str2 - Second string | |
* @returns {number} - The Levenshtein distance | |
*/ | |
function levenshteinDistance(str1, str2) { | |
// Create matrix | |
const matrix = []; | |
// Initialize matrix | |
for (let i = 0; i <= str1.length; i++) { | |
matrix[i] = [i]; | |
} | |
for (let j = 0; j <= str2.length; j++) { | |
matrix[0][j] = j; | |
} | |
// Fill matrix | |
for (let i = 1; i <= str1.length; i++) { | |
for (let j = 1; j <= str2.length; j++) { | |
if (str1[i-1] === str2[j-1]) { | |
matrix[i][j] = matrix[i-1][j-1]; | |
} else { | |
matrix[i][j] = Math.min( | |
matrix[i-1][j-1] + 1, // substitution | |
matrix[i][j-1] + 1, // insertion | |
matrix[i-1][j] + 1 // deletion | |
); | |
} | |
} | |
} | |
return matrix[str1.length][str2.length]; | |
} | |
/** | |
* Find the closest match for a word from a dictionary | |
* @param {string} word - Word to correct | |
* @param {string[]} dictionary - List of correct words | |
* @param {number} maxDistance - Maximum Levenshtein distance to consider as a match | |
* @returns {string|null} - Closest match or null if no match found | |
*/ | |
function findClosestMatch(word, dictionary, maxDistance = 2) { | |
if (!word) return null; | |
// Check for exact match first | |
if (dictionary.includes(word)) return word; | |
let closestWord = null; | |
let minDistance = Infinity; | |
// Compare with each word in the dictionary | |
for (const dictWord of dictionary) { | |
// Skip words with a big length difference for optimization | |
if (Math.abs(dictWord.length - word.length) > maxDistance) continue; | |
const distance = levenshteinDistance(word.toLowerCase(), dictWord.toLowerCase()); | |
if (distance < minDistance) { | |
minDistance = distance; | |
closestWord = dictWord; | |
} | |
} | |
// Only return a suggestion if the distance is below the threshold | |
return minDistance <= maxDistance ? closestWord : null; | |
} | |
/** | |
* Preserve case when applying corrections | |
* @param {string} original - Original word | |
* @param {string} correction - Corrected word | |
* @returns {string} - Correction with preserved case | |
*/ | |
function preserveCase(original, correction) { | |
if (!correction) return original; | |
// If original is all uppercase, return correction in uppercase | |
if (original === original.toUpperCase()) { | |
return correction.toUpperCase(); | |
} | |
// If original is capitalized, capitalize the correction | |
if (original[0] === original[0].toUpperCase() && | |
original.slice(1) === original.slice(1).toLowerCase()) { | |
return correction[0].toUpperCase() + correction.slice(1).toLowerCase(); | |
} | |
// Otherwise return the correction as-is | |
return correction; | |
} | |
/** | |
* Correct a word based on a dictionary | |
* @param {string} word - Word to correct | |
* @param {string[]} dictionary - Dictionary of correct words | |
* @param {number} maxDistance - Maximum edit distance to consider | |
* @returns {string} - Corrected word or original if no correction found | |
*/ | |
function correctWord(word, dictionary, maxDistance = 2) { | |
const correction = findClosestMatch(word, dictionary, maxDistance); | |
return correction ? preserveCase(word, correction) : word; | |
} | |
/** | |
* Extract JSON from error string in format "Error: {JSON}" | |
* @param {Error|string} error - Error object or string | |
* @returns {object} - Extracted JSON object | |
*/ | |
function extractErrorJSON(error) { | |
// Default values | |
const defaultResult = { | |
message: "Unknown error", | |
code: "UNKNOWN" | |
}; | |
// Convert to string if needed | |
const errorString = typeof error === 'string' ? error : String(error); | |
// Check if it matches our expected format "Error: {JSON}" | |
const match = errorString.match(/Error:\s*(\{.*\})/); | |
if (!match) return defaultResult; | |
try { | |
// Parse the JSON portion | |
const jsonString = match[1]; | |
const errorData = JSON.parse(jsonString); | |
return errorData || defaultResult; | |
} catch (e) { | |
// JSON parsing failed | |
console.error("Failed to parse error JSON:", e); | |
return defaultResult; | |
} | |
} | |
// Example usage: | |
// const dictionary = ["message", "code", "error", "exception", "upstream", "service"]; | |
// const misspelledWord = "messige"; | |
// console.log(correctWord(misspelledWord, dictionary)); // "message" | |
// const error = new Error('Error: {"message":"Upstream service exception","code":"VMO2-FMC-10004","errorSet":"create-order"}'); | |
// console.log(extractErrorJSON(error)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment