Created
December 19, 2019 01:30
-
-
Save djinn/82c4d1114d7cbfa8b523d620bcc31923 to your computer and use it in GitHub Desktop.
Graph edge lookup specifically implemented for relation lookup on phone numbers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from json import loads | |
| from random import choice, randint, sample | |
| import timeit | |
| # With this model it is imperative to use integer form of phone number instead of string | |
| # We will use median contact frequency distribution from customer data | |
| # Maximum contacts: 14551 | |
| # Minmum: 233 | |
| # Median: 393 | |
| def create_single_phone_number(s=1000000000, e=9999999999): | |
| return randint(s, e) | |
| def create_phone_number(n): | |
| for v in range(n): | |
| yield create_single_phone_number() | |
| def create_contacts(contacts, _min, _max, _median): | |
| #We will simplify by using median for now | |
| corpus = {} | |
| for c in contacts: | |
| c1 = sample(contacts, _median) # It should remove self-reference but that does not effect performance | |
| corpus[c] = c1 | |
| return corpus | |
| def convert_to_long(contact, pair): | |
| return(contact * 10000000000) + pair # the phone number is ten digits | |
| def convert_contacts_to_long(contacts_dict): | |
| for k, v in contacts_dict.items(): | |
| for j in v: | |
| yield convert_to_long(k, j) | |
| def lookup_in_dictionary(contacts_dict, test_sample): | |
| l = [] | |
| for t, v in test_sample: | |
| if t in contacts_dict: | |
| if contacts_dict[t][0] == v[0]: | |
| l.append(0) | |
| else: | |
| continue | |
| return len(l) | |
| def lookup_in_set(contacts_set, test_sample_set): | |
| return len(contacts_set.intersection(test_sample_set)) | |
| l = list(create_phone_number(10000)) | |
| print("Created the phone number pool") | |
| contacts_dict = create_contacts(l, 233, 14551, 393) | |
| print("Created the phone contacts datastructure") | |
| test_sample = sample(contacts_dict.items(), 10) | |
| contacts_set = set(convert_contacts_to_long(contacts_dict)) | |
| test_sample_set = set([convert_to_long(k, v[0]) for k, v in test_sample]) | |
| if __name__ == '__main__': | |
| print("Executed using integer dictionary/string dictionary will be slower than this") | |
| print(timeit.timeit("lookup_in_dictionary(contacts_dict, test_sample)", setup="from __main__ import lookup_in_dictionary, contacts_dict, test_sample")) | |
| print("Executed using set intersection") | |
| print(timeit.timeit("lookup_in_set(contacts_set, test_sample_set)", setup="from __main__ import lookup_in_set, contacts_set, test_sample_set")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment