Skip to content

Instantly share code, notes, and snippets.

@djinn
Created December 19, 2019 01:30
Show Gist options
  • Select an option

  • Save djinn/82c4d1114d7cbfa8b523d620bcc31923 to your computer and use it in GitHub Desktop.

Select an option

Save djinn/82c4d1114d7cbfa8b523d620bcc31923 to your computer and use it in GitHub Desktop.
Graph edge lookup specifically implemented for relation lookup on phone numbers
from json import loads
from random import choice, randint, sample
import timeit
# With this model it is imperative to use integer form of phone number instead of string
# We will use median contact frequency distribution from customer data
# Maximum contacts: 14551
# Minmum: 233
# Median: 393
def create_single_phone_number(s=1000000000, e=9999999999):
return randint(s, e)
def create_phone_number(n):
for v in range(n):
yield create_single_phone_number()
def create_contacts(contacts, _min, _max, _median):
#We will simplify by using median for now
corpus = {}
for c in contacts:
c1 = sample(contacts, _median) # It should remove self-reference but that does not effect performance
corpus[c] = c1
return corpus
def convert_to_long(contact, pair):
return(contact * 10000000000) + pair # the phone number is ten digits
def convert_contacts_to_long(contacts_dict):
for k, v in contacts_dict.items():
for j in v:
yield convert_to_long(k, j)
def lookup_in_dictionary(contacts_dict, test_sample):
l = []
for t, v in test_sample:
if t in contacts_dict:
if contacts_dict[t][0] == v[0]:
l.append(0)
else:
continue
return len(l)
def lookup_in_set(contacts_set, test_sample_set):
return len(contacts_set.intersection(test_sample_set))
l = list(create_phone_number(10000))
print("Created the phone number pool")
contacts_dict = create_contacts(l, 233, 14551, 393)
print("Created the phone contacts datastructure")
test_sample = sample(contacts_dict.items(), 10)
contacts_set = set(convert_contacts_to_long(contacts_dict))
test_sample_set = set([convert_to_long(k, v[0]) for k, v in test_sample])
if __name__ == '__main__':
print("Executed using integer dictionary/string dictionary will be slower than this")
print(timeit.timeit("lookup_in_dictionary(contacts_dict, test_sample)", setup="from __main__ import lookup_in_dictionary, contacts_dict, test_sample"))
print("Executed using set intersection")
print(timeit.timeit("lookup_in_set(contacts_set, test_sample_set)", setup="from __main__ import lookup_in_set, contacts_set, test_sample_set"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment