Skip to content

Instantly share code, notes, and snippets.

@raynirola
Last active August 23, 2023 19:02
Show Gist options
  • Save raynirola/d48b754ab113172a33b0032385b24a0b to your computer and use it in GitHub Desktop.
Save raynirola/d48b754ab113172a33b0032385b24a0b to your computer and use it in GitHub Desktop.
abstract class Comparator {
protected nGrams: Map<string, number> = new Map()
protected abstract process(inputString: string): string
protected abstract compute(inputString: string): Map<string, number>
public abstract compare(first: string, second: string): number
}
export class BigramComparator extends Comparator {
protected process(inputString: string): string {
return inputString.replace(/\s+/g, '')
}
protected compute(inputString: string): Map<string, number> {
const bigrams = new Map<string, number>()
for (let i = 0; i < inputString.length - 1; i++) {
const bigram = inputString.substring(i, i + 2)
const count = bigrams.has(bigram) ? bigrams.get(bigram)! + 1 : 1
bigrams.set(bigram, count)
}
return bigrams
}
public compare(first: string, second: string): number {
first = this.process(first)
second = this.process(second)
if (first === second) return 1
if (first.length < 2 || second.length < 2) return 0
this.nGrams = this.compute(first)
let intersectionSize = 0
for (let i = 0; i < second.length - 1; i++) {
const bigram = second.substring(i, i + 2)
const count = this.nGrams.has(bigram) ? this.nGrams.get(bigram)! : 0
if (count > 0) {
this.nGrams.set(bigram, count - 1)
intersectionSize++
}
}
return (2.0 * intersectionSize) / (first.length + second.length - 2)
}
}
export class TrigramComparator extends Comparator {
protected process(inputString: string): string {
return inputString.replace(/\s+/g, '')
}
protected compute(inputString: string): Map<string, number> {
const trigrams = new Map<string, number>()
for (let i = 0; i < inputString.length - 2; i++) {
const trigram = inputString.substring(i, i + 3)
const count = trigrams.has(trigram) ? trigrams.get(trigram)! + 1 : 1
trigrams.set(trigram, count)
}
return trigrams
}
public compare(first: string, second: string): number {
first = this.process(first)
second = this.process(second)
if (first === second) return 1
if (first.length < 3 || second.length < 3) return 0
this.nGrams = this.compute(first)
let intersectionSize = 0
for (let i = 0; i < second.length - 2; i++) {
const trigram = second.substring(i, i + 3)
const count = this.nGrams.has(trigram) ? this.nGrams.get(trigram)! : 0
if (count > 0) {
this.nGrams.set(trigram, count - 1)
intersectionSize++
}
}
return (2.0 * intersectionSize) / (first.length + second.length - 4)
}
}
interface MatchResult {
ratings: { target: string; rating: number }[]
bestMatch: { target: string; rating: number }
bestMatchIndex: number
}
export class StringMatcher {
constructor(private comparator: Comparator) {}
public match(mainString: string, targetStrings: string[]): MatchResult {
if (!this.verify(mainString, targetStrings)) {
throw new Error('Bad arguments: First argument should be a string, second should be an array of strings')
}
const ratings: { target: string; rating: number }[] = []
let bestMatchIndex: number = 0
for (let i = 0; i < targetStrings.length; i++) {
const currentTargetString: string = targetStrings[i]
const currentRating: number = this.comparator.compare(mainString, currentTargetString)
ratings.push({ target: currentTargetString, rating: currentRating })
if (currentRating > ratings[bestMatchIndex].rating) {
bestMatchIndex = i
}
}
const bestMatch: { target: string; rating: number } = ratings[bestMatchIndex]
return { ratings, bestMatch, bestMatchIndex }
}
private verify(mainString: string, targetStrings: string[]): boolean {
if (typeof mainString !== 'string') return false
if (!Array.isArray(targetStrings)) return false
if (!targetStrings.length) return false
if (targetStrings.find(s => typeof s !== 'string')) return false
return true
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment