Last active
August 23, 2023 19:02
-
-
Save raynirola/d48b754ab113172a33b0032385b24a0b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
abstract class Comparator { | |
protected nGrams: Map<string, number> = new Map() | |
protected abstract process(inputString: string): string | |
protected abstract compute(inputString: string): Map<string, number> | |
public abstract compare(first: string, second: string): number | |
} | |
export class BigramComparator extends Comparator { | |
protected process(inputString: string): string { | |
return inputString.replace(/\s+/g, '') | |
} | |
protected compute(inputString: string): Map<string, number> { | |
const bigrams = new Map<string, number>() | |
for (let i = 0; i < inputString.length - 1; i++) { | |
const bigram = inputString.substring(i, i + 2) | |
const count = bigrams.has(bigram) ? bigrams.get(bigram)! + 1 : 1 | |
bigrams.set(bigram, count) | |
} | |
return bigrams | |
} | |
public compare(first: string, second: string): number { | |
first = this.process(first) | |
second = this.process(second) | |
if (first === second) return 1 | |
if (first.length < 2 || second.length < 2) return 0 | |
this.nGrams = this.compute(first) | |
let intersectionSize = 0 | |
for (let i = 0; i < second.length - 1; i++) { | |
const bigram = second.substring(i, i + 2) | |
const count = this.nGrams.has(bigram) ? this.nGrams.get(bigram)! : 0 | |
if (count > 0) { | |
this.nGrams.set(bigram, count - 1) | |
intersectionSize++ | |
} | |
} | |
return (2.0 * intersectionSize) / (first.length + second.length - 2) | |
} | |
} | |
export class TrigramComparator extends Comparator { | |
protected process(inputString: string): string { | |
return inputString.replace(/\s+/g, '') | |
} | |
protected compute(inputString: string): Map<string, number> { | |
const trigrams = new Map<string, number>() | |
for (let i = 0; i < inputString.length - 2; i++) { | |
const trigram = inputString.substring(i, i + 3) | |
const count = trigrams.has(trigram) ? trigrams.get(trigram)! + 1 : 1 | |
trigrams.set(trigram, count) | |
} | |
return trigrams | |
} | |
public compare(first: string, second: string): number { | |
first = this.process(first) | |
second = this.process(second) | |
if (first === second) return 1 | |
if (first.length < 3 || second.length < 3) return 0 | |
this.nGrams = this.compute(first) | |
let intersectionSize = 0 | |
for (let i = 0; i < second.length - 2; i++) { | |
const trigram = second.substring(i, i + 3) | |
const count = this.nGrams.has(trigram) ? this.nGrams.get(trigram)! : 0 | |
if (count > 0) { | |
this.nGrams.set(trigram, count - 1) | |
intersectionSize++ | |
} | |
} | |
return (2.0 * intersectionSize) / (first.length + second.length - 4) | |
} | |
} | |
interface MatchResult { | |
ratings: { target: string; rating: number }[] | |
bestMatch: { target: string; rating: number } | |
bestMatchIndex: number | |
} | |
export class StringMatcher { | |
constructor(private comparator: Comparator) {} | |
public match(mainString: string, targetStrings: string[]): MatchResult { | |
if (!this.verify(mainString, targetStrings)) { | |
throw new Error('Bad arguments: First argument should be a string, second should be an array of strings') | |
} | |
const ratings: { target: string; rating: number }[] = [] | |
let bestMatchIndex: number = 0 | |
for (let i = 0; i < targetStrings.length; i++) { | |
const currentTargetString: string = targetStrings[i] | |
const currentRating: number = this.comparator.compare(mainString, currentTargetString) | |
ratings.push({ target: currentTargetString, rating: currentRating }) | |
if (currentRating > ratings[bestMatchIndex].rating) { | |
bestMatchIndex = i | |
} | |
} | |
const bestMatch: { target: string; rating: number } = ratings[bestMatchIndex] | |
return { ratings, bestMatch, bestMatchIndex } | |
} | |
private verify(mainString: string, targetStrings: string[]): boolean { | |
if (typeof mainString !== 'string') return false | |
if (!Array.isArray(targetStrings)) return false | |
if (!targetStrings.length) return false | |
if (targetStrings.find(s => typeof s !== 'string')) return false | |
return true | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment