Last active
March 14, 2024 15:24
-
-
Save xiejuncs/6f2694564263907dd09743f61d5377a9 to your computer and use it in GitHub Desktop.
KNN simple Python code example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Use multiple to quantify the vector close or far away from the origin (query vector in this example). | |
def get_vector(dimension, multiple): | |
vector = [] | |
for i in range(dimension): | |
vector.append(multiple * 0.000001) | |
return vector | |
def get_all_zero_vector(dimension): | |
vector = [] | |
for i in range(dimension): | |
vector.append(0) | |
return vector | |
def l2_squared_distance(first, second): | |
assert (len(first) == len(second)) | |
distance = 0.0 | |
for i in range(len(first)): | |
d = first[i] - second[i] | |
distance += d * d | |
return distance | |
def knn(num_of_vectors, dimension, num_of_items): | |
# Use array directly instead of numpy.array for the illustration purpose. | |
query_vector = get_all_zero_vector(dimension) | |
example_data_set = [] | |
for i in range(num_of_vectors): | |
vector = get_vector(dimension, i) | |
# Add the id and vector into the dataset. | |
example_data_set.append((i, l2_squared_distance(query_vector, vector))) | |
# Sort data based on the distance. | |
example_data_set.sort(key=lambda data: data[1]) | |
return example_data_set[0:num_of_items] | |
if __name__ == '__main__': | |
res = knn(1000, 128, 10) | |
formatted_text = "id: {id}, distance: {distance}" | |
for item in res: | |
print(formatted_text.format(id=item[0], distance=item[1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment