Last active
May 2, 2018 01:13
-
-
Save heartonbit/e751d767595bc82e938bc4e9888ea1f6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cosine_distances(X, Y): | |
""" | |
X : Target example score vector DataFrame with inst_id as the first column | |
Y : All example score vector DataFrame with inst_id as the first column | |
return pair-wise cosine distance DataFrame | |
""" | |
from sklearn.metrics import pairwise | |
x_header = X.iloc[:, 0].values | |
X = X.iloc[:, 1:] | |
y_header = Y.iloc[:, 0].values | |
Y = Y.iloc[:, 1:] | |
d_mat = pairwise.cosine_distances(X, Y) | |
d_mat_df = pd.DataFrame(d_mat, index=x_header) | |
d_mat_df.columns = y_header | |
return d_mat_df | |
def validate(X, Y, d_mat_df) | |
""" | |
Validate cosine distances function | |
X : Bait DataFrame with inst_id as the first column | |
Y : All DataFrame with inst_id as the first column | |
d_mat_df : result DataFrame from cosine_distances function | |
""" | |
for x_inst in X.iloc[:, 0]: | |
x1 = X[X['inst_id'] == x_inst].iloc[:, 1:] | |
for y_inst in Y.iloc[:, 0]: | |
y1 = Y[Y['inst_id'] == y_inst].iloc[:, 1:] | |
dist1 = cosine_distances(x1, y1)[0][0] | |
dist2 = d_mat_df[y_inst][x_inst] | |
assert abs(dist1 - dist2) < 0.00001, "Something is wrong. {0} is NOT equal with {1}".format(dist1, dist2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment