Skip to content

Instantly share code, notes, and snippets.

@vhxs
Last active June 25, 2023 04:05
Show Gist options
  • Save vhxs/6c01f4093e5492cc606d840dfa052038 to your computer and use it in GitHub Desktop.
Save vhxs/6c01f4093e5492cc606d840dfa052038 to your computer and use it in GitHub Desktop.
Jaccard metric spaces cannot be isometrically embedded in Euclidean space
import numpy as np
from random import sample
# The Jaccard distance is truly a metric: https://arxiv.org/pdf/1612.02696.pdf
def jaccard_distance(set1: set, set2: set):
union_size = len(set1.union(set2))
intersection_size = len(set1.intersection(set2))
return 1 - (intersection_size / union_size)
# Give me some random sets
def random_sets_generator(universe_size=10, set_size=5, num_sets=10):
for _ in range(num_sets):
yield set(sample(range(universe_size), set_size))
def random_vectors_generator(dimension=10, num_vectors=10):
for _ in range(num_vectors):
yield np.random.rand(dimension)
# Yield the distance matrix of randomly generated sets, with the Jaccard metric, as a numpy array
def random_jaccard_metric_space_generator(universe_size=10, set_size=5, num_sets=10):
while True:
# generate random sets
random_sets = [random_set for random_set in random_sets_generator(universe_size, set_size, num_sets)]
# compute distance matrix
matrix_as_list = []
for i in range(num_sets):
matrix_as_list.append([])
for j in range(num_sets):
distance = jaccard_distance(random_sets[i], random_sets[j])
matrix_as_list[i].append(distance)
yield np.array(matrix_as_list), random_sets
def random_euclidean_metric_space_generator(dimension=10, num_vectors=10):
while True:
# generate random sets
random_vectors = [random_set for random_set in random_vectors_generator(dimension, num_vectors)]
# compute distance matrix
matrix_as_list = []
for i in range(num_vectors):
matrix_as_list.append([])
for j in range(num_vectors):
distance = np.linalg.norm(random_vectors[i] - random_vectors[j])**2
matrix_as_list[i].append(distance)
yield np.array(matrix_as_list), random_vectors
# A finite metric space can be embedded in Euclidean space if the following matrix is positive semidefinite
# https://en.wikipedia.org/wiki/Euclidean_distance_matrix#Characterizations
def is_embeddable(distance_matrix):
num_points = distance_matrix.shape[0]
matrix_as_list = []
for i in range(1, num_points):
matrix_as_list.append([])
for j in range(1, num_points):
elt = (distance_matrix[0][i]**2 + distance_matrix[0][j]**2 - distance_matrix[i][j]**2) / 2
matrix_as_list[i-1].append(elt)
is_this_positive_semidefinite = np.array(matrix_as_list)
return np.all(np.linalg.eigvals(is_this_positive_semidefinite) >= 0)
if __name__ == "__main__":
for random_distance_matrix, random_sets in random_jaccard_metric_space_generator():
if not is_embeddable(random_distance_matrix):
print("Counterexample found!")
print(random_sets)
exit()
@vhxs
Copy link
Author

vhxs commented Jun 24, 2023

@bjoernkjoshanssen unclear to me. I just translated the criterion on this Wikipedia page into Python code, which I think I did correctly; it doesn't square the elements of the distance matrix: https://en.wikipedia.org/wiki/Euclidean_distance_matrix#Characterizations

I didn't dig into the derivation of the Schoenberg criterion to understand why it's true (couldn't find a non-pay-walled proof either). Happy to be corrected on any of the above.

@bjoernkjoshanssen
Copy link

At the top of the Wikipedia article you see that "Euclidean distance matrix" actually means the matrix of squared distances. Granted, it's a strange piece of terminology.

@vhxs
Copy link
Author

vhxs commented Jun 25, 2023

Gist updated. Thanks for the correction!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment