Skip to content

Instantly share code, notes, and snippets.

@AlJohri
Last active June 21, 2016 14:34
Show Gist options
  • Save AlJohri/bdc7a5a0894d6c79d5201f96c11cb593 to your computer and use it in GitHub Desktop.
Save AlJohri/bdc7a5a0894d6c79d5201f96c11cb593 to your computer and use it in GitHub Desktop.
import sys, random, itertools
N = 100000
keys = ["a", "b", "c"]
print("Creating {} random objects with keys {}".format(N, keys))
objects = [{k:random.randint(1,10) for k in keys} for _ in range(N)]
print(sys.getsizeof(objects) / 1024, "kilobytes")
print()
print("Some Sample Objects:")
for obj in objects[:10]:
print(id(obj), obj)
print("...")
print()
all_possible_indices = []
for i in range(1, len(keys)+1):
indicies_with_i_keys = list(itertools.combinations(keys, i))
all_possible_indices += indicies_with_i_keys
list_of_indexed_objects = []
print("Creating {} indicies for objects:".format(len(all_possible_indices)))
for index in all_possible_indices:
print("Index By:", index, end=" - ")
grouper = lambda obj: tuple(obj[key] for key in index)
indexed_objects = {k:tuple(v) for k,v in itertools.groupby(sorted(objects, key=grouper), key=grouper)}
print(len(indexed_objects), "groups", " | ", sys.getsizeof(indexed_objects) / 1024, "kilobytes")
list_of_indexed_objects.append(indexed_objects)
print()
print("Find a random object in objects:")
random_object = random.choice(objects)
print(random_object)
print()
print("Query for random object using each index")
for index, indexed_objects in zip(all_possible_indices, list_of_indexed_objects):
query = tuple(random_object[key] for key in index)
query_result = indexed_objects[query]
print(
"index:", index, "|", "query:", query, "|",
"number of results:", "|", len(query_result), "|",
"is random object in result?", "|", random_object in query_result)
Creating 100000 random objects with keys ['a', 'b', 'c']
805.140625 kilobytes
Some Sample Objects:
4303252040 {'c': 2, 'a': 10, 'b': 7}
4316479432 {'c': 5, 'a': 7, 'b': 8}
4316487752 {'c': 9, 'a': 9, 'b': 4}
4303252424 {'c': 10, 'a': 1, 'b': 3}
4316690056 {'c': 1, 'a': 6, 'b': 9}
4316690120 {'c': 3, 'a': 7, 'b': 1}
4316705032 {'c': 10, 'a': 4, 'b': 2}
4316705096 {'c': 5, 'a': 4, 'b': 5}
4316705160 {'c': 1, 'a': 2, 'b': 8}
4316705224 {'c': 7, 'a': 9, 'b': 6}
...
Creating 7 indicies for objects:
Index By: ('a',) - 10 groups | 0.46875 kilobytes
Index By: ('b',) - 10 groups | 0.46875 kilobytes
Index By: ('c',) - 10 groups | 0.46875 kilobytes
Index By: ('a', 'b') - 100 groups | 6.09375 kilobytes
Index By: ('a', 'c') - 100 groups | 6.09375 kilobytes
Index By: ('b', 'c') - 100 groups | 6.09375 kilobytes
Index By: ('a', 'b', 'c') - 1000 groups | 48.09375 kilobytes
Find a random object in objects:
{'c': 1, 'a': 4, 'b': 9}
Query for random object using each index
index: ('a',) | query: (4,) | number of results: | 9901 | is random object in result? | True
index: ('b',) | query: (9,) | number of results: | 10073 | is random object in result? | True
index: ('c',) | query: (1,) | number of results: | 10082 | is random object in result? | True
index: ('a', 'b') | query: (4, 9) | number of results: | 1022 | is random object in result? | True
index: ('a', 'c') | query: (4, 1) | number of results: | 967 | is random object in result? | True
index: ('b', 'c') | query: (9, 1) | number of results: | 1046 | is random object in result? | True
index: ('a', 'b', 'c') | query: (4, 9, 1) | number of results: | 110 | is random object in result? | True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment