Skip to content

Instantly share code, notes, and snippets.

@brydavis
Created April 14, 2016 23:20
Show Gist options
  • Select an option

  • Save brydavis/c7ba7c961a2fee78042b7917cfe7be95 to your computer and use it in GitHub Desktop.

Select an option

Save brydavis/c7ba7c961a2fee78042b7917cfe7be95 to your computer and use it in GitHub Desktop.
# coding: utf-8
# In[70]:
import re
ssn_data = [
["000115464","000135464"], # 4
["763415464", "165415464"], #6
["763-41-5464", "165-41-5464"], #6
["-01-5464", "2-41-5464"], #5
["469879875", "469879870"], # 0
["783-65-6374", "783656374"], # 9
["115464","5464"], # 4
]
def ssn(a, b):
def trail_zeros(x):
return x + ("0" * (9 - len(x)))
def clean(x):
return re.sub("[^0-9]","", x)
a = trail_zeros(clean(a[::-1]))
b = trail_zeros(clean(b[::-1]))
score = 0
for i in range(9):
if a[i] is b[i]:
score += 1
else:
break
return score
# In[71]:
for pair in ssn_data:
print(ssn(pair[0], pair[1]), "\t", pair)
# In[97]:
import time
dob_data = [
["1985-06-07", "1985-06-07"], # 6
["1985-06-07", "1985-6-7"], # 6
["1988-06-07", "1985-06-07"], # 3
["1985-01-07", "1985-06-07"], # 4
]
def dob(a, b):
a = time.strptime(a, "%Y-%m-%d")
b = time.strptime(b, "%Y-%m-%d")
score = 3 if a.tm_year == b.tm_year else (3 - (1*(abs(a.tm_year-b.tm_year))))
score += 2 if a.tm_mon == b.tm_mon else 0
score += 1 if a.tm_mday == b.tm_mday else 0
return score
# In[98]:
for pair in dob_data:
print(dob(pair[0], pair[1]), "\t", pair)
# In[102]:
# a = "783656374"
# # print(a[1::-1])
# abs(sum([-1*(int(x)+2) for x in a]))
# In[199]:
import re
from collections import defaultdict
name_data = [
["Bryan","BrYaN"], # 5
["ChristoPHER","CHRIST opher"], # 11
["lucia","LUCIA"], # 5
["O'Brien", "O'Brien"], # 6
["Davis", "Davidson"], # 4
["Julia", "Julie"], # 4
["Amy Jo", "Jo"], # 2
["Amy Jo", "Amy"], # 0
["Michaels-Smith", "Michaels Smith"], # 0
["Jones-Smith", "JonesSmith"], # 0
["Billy Jo", "Billy Jo"], # 0
]
name_counter = defaultdict(int)
def name(a, b):
def clean(x):
return re.sub("[^A-z]","", x).lower()
x = a.split(" ")
y = b.split(" ")
if len(x) > 1 or len(y) > 1:
s = []
for i in x:
for j in y:
s.append(name(i, j))
a = clean(a)
b = clean(b)
lim = min(len(a), len(b))
score = 0
for i in range(lim):
if a[i] is b[i]:
score += 1
else:
break
s.append(score)
return max(s)
else:
a = clean(a)
b = clean(b)
name_counter[a]+=1
name_counter[b]+=1
lim = min(len(a), len(b))
score = 0
for i in range(lim):
if a[i] is b[i]:
score += 1
else:
break
commonality = max(name_counter.get(a), name_counter.get(b)) / sum(name_counter.values())
print(commonality)
return score
# In[200]:
for pair in name_data:
print(name(pair[0], pair[1]), "\t", pair)
name_counter = dict(name_counter)
if "" in name_counter:
name_counter.pop("")
print(name_counter)
total_names = sum(name_counter.values())
for nc in name_counter:
name_counter[nc] = round(name_counter[nc] / total_names, 2)
print(name_counter)
# In[192]:
round(45.34545345,2)
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment