Created
February 29, 2024 10:02
-
-
Save fredriccliver/67641ff676263be8a53fd17b4475e216 to your computer and use it in GitHub Desktop.
calculate utterance similarity using DTW algorithm.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.spatial.distance import euclidean | |
from fastdtw import fastdtw | |
import numpy as np | |
def calculate_similarity_percentage(distance, max_distance): | |
similarity_percentage = (1 - distance / max_distance) * 100 | |
return similarity_percentage | |
def main(): | |
test_sets = [ | |
("Not yet. Oh, I can see it. Sorry, I can see it.", | |
"Not yet, Oh, I can see it. Sorry I can see."), | |
("test3", | |
"test4"), | |
("1234567890", | |
"0123456789"), | |
("100%, perfectly matched", | |
"100%, perfectly matched"), | |
("100%, perfectly matched", | |
"100%, perfectly matche."), | |
("non sementic parts is modified, so it's not p roblem.", | |
"non-sementic parts is modified. so it's not a problem."), | |
("I have a lots of experience in NLP. But NLP means Non-Lexical-Problem.", | |
"I have a lots of experience in NLP."), | |
# Add more test sets as needed | |
] | |
for text1, text2 in test_sets: | |
ascii_values1 = [ord(char) for char in text1] | |
ascii_values2 = [ord(char) for char in text2] | |
# Reshape for DTW | |
ascii_values1 = np.array(ascii_values1).reshape(-1, 1) | |
ascii_values2 = np.array(ascii_values2).reshape(-1, 1) | |
# Compute the DTW distance | |
distance, path = fastdtw(ascii_values1, ascii_values2, dist=euclidean) | |
# distance = calculate_dtw_distance(text1, text2) | |
max_distance = max(len(text1), len(text2)) * (122 - 32) | |
similarity_percentage = (1 - distance / max_distance) * 100 | |
# similarity_percentage = calculate_similarity_percentage(distance, max_distance) | |
print("-------------------------") | |
print(f"Text 1: {text1}") | |
print(f"Text 2: {text2}") | |
# print(f"DTW Distance: {distance}") | |
print(f"Similarity Percentage: {similarity_percentage:.2f}%") | |
print("-------------------------") | |
# if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment