Last active
September 10, 2019 02:29
-
-
Save e-mon/b2d3694218e90ac6d5b3d29793290a1f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def re_ranking(model, probFea,galFea,k1,k2,lambda_value): | |
query_num = probFea.shape[0] | |
all_num = query_num + galFea.shape[0] | |
feat = np.append(probFea,galFea,axis = 0) | |
feat = feat.astype(np.float16) | |
feat = torch.from_numpy(feat).half().cuda() | |
print('computing original distance') | |
sz = feat.shape[0] | |
d = [] | |
model.eval() | |
with torch.no_grad(): | |
m = model.module if isinstance(model,FP16) else model | |
m = m.module if isinstance(m,nn.DataParallel) else m | |
for i in tqdm_notebook(range(sz)): | |
preds = m.get_d(feat[i],feat) | |
d.append(preds) | |
original_dist = np.stack(d) | |
del feat | |
gallery_num = original_dist.shape[0] | |
original_dist = np.transpose(original_dist/np.max(original_dist,axis = 0)) | |
V = np.zeros_like(original_dist).astype(np.float16) | |
initial_rank = np.argsort(original_dist).astype(np.int32) | |
print('starting re_ranking') | |
for i in range(all_num): | |
# k-reciprocal neighbors | |
forward_k_neigh_index = initial_rank[i,:k1+1] | |
backward_k_neigh_index = initial_rank[forward_k_neigh_index,:k1+1] | |
fi = np.where(backward_k_neigh_index==i)[0] | |
k_reciprocal_index = forward_k_neigh_index[fi] | |
k_reciprocal_expansion_index = k_reciprocal_index | |
for j in range(len(k_reciprocal_index)): | |
candidate = k_reciprocal_index[j] | |
candidate_forward_k_neigh_index = initial_rank[candidate,:int(np.around(k1/2))+1] | |
candidate_backward_k_neigh_index = initial_rank[candidate_forward_k_neigh_index,:int(np.around(k1/2))+1] | |
fi_candidate = np.where(candidate_backward_k_neigh_index == candidate)[0] | |
candidate_k_reciprocal_index = candidate_forward_k_neigh_index[fi_candidate] | |
if len(np.intersect1d(candidate_k_reciprocal_index,k_reciprocal_index))> (2/3)*len(candidate_k_reciprocal_index): | |
k_reciprocal_expansion_index = np.append(k_reciprocal_expansion_index,candidate_k_reciprocal_index) | |
k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) | |
weight = np.exp(-original_dist[i,k_reciprocal_expansion_index]) | |
V[i,k_reciprocal_expansion_index] = weight/np.sum(weight) | |
original_dist = original_dist[:query_num,] | |
if k2 != 1: | |
V_qe = np.zeros_like(V,dtype=np.float16) | |
for i in range(all_num): | |
V_qe[i,:] = np.mean(V[initial_rank[i,:k2],:],axis=0) | |
V = V_qe | |
del V_qe | |
del initial_rank | |
invIndex = [] | |
for i in range(gallery_num): | |
invIndex.append(np.where(V[:,i] != 0)[0]) | |
jaccard_dist = np.zeros_like(original_dist,dtype = np.float16) | |
for i in range(query_num): | |
temp_min = np.zeros(shape=[1,gallery_num],dtype=np.float16) | |
indNonZero = np.where(V[i,:] != 0)[0] | |
indImages = [] | |
indImages = [invIndex[ind] for ind in indNonZero] | |
for j in range(len(indNonZero)): | |
temp_min[0,indImages[j]] = temp_min[0,indImages[j]]+ np.minimum(V[i,indNonZero[j]],V[indImages[j],indNonZero[j]]) | |
jaccard_dist[i] = 1-temp_min/(2-temp_min) | |
final_dist = jaccard_dist*(1-lambda_value) + original_dist*lambda_value | |
del original_dist | |
del V | |
del jaccard_dist | |
final_dist = final_dist[:query_num,query_num:] | |
print('re-rank done..') | |
return final_dist | |
def get_val_reciprocal_nbs(model,emb_df,out='val.csv',dcut=None, k=16): | |
emb_df = emb_df.copy() | |
data = pd.read_csv(LABELS).set_index('Image') | |
emb_df['emb'] = [[float(i) for i in s.split()] for s in emb_df['emb']] | |
emb_df.set_index('files',inplace=True) | |
train_df = data.join(emb_df) | |
train_df = train_df.reset_index() | |
#the split should be the same as one used for training | |
trn_df, val_df = train_test_split(train_df,test_size=0.2, random_state=42) | |
trn_preds = np.array(trn_df.emb.tolist()) | |
val_preds = np.array(val_df.emb.tolist()) | |
trn_df = trn_df.reset_index() | |
val_df = val_df.reset_index() | |
trn_preds = torch.from_numpy(trn_preds).half().cuda() | |
val_preds = torch.from_numpy(val_preds).half().cuda() | |
ds = re_ranking(learner.model, val_preds, trn_preds, k1=16, k2=3, lambda_value=0.3) | |
argsorted = ds.argsort() | |
trn_idxs = argsorted[:, :k] | |
trn_d = np.vstack([ds[i, trn_idxs[i, :]] for i in range(trn_idxs.shape[0])]) | |
s = [] | |
for l1 in trn_d.tolist(): | |
s.append(' '.join([str(l2) for l2 in l1])) | |
val_df['d'] = s | |
val_df['nbs'] = [' '.join(trn_df.loc[trn_idxs[index]].Id.tolist()) \ | |
for index, row in val_df.iterrows()] | |
val_df[['Image','Id','nbs','d']].to_csv(out, header=True, index=False) | |
if dcut is not None: | |
scores = [] | |
for idx in val_df.index: | |
l0 = val_df.loc[idx].Id | |
nbs = dict() | |
for i in range(k): #16 neighbors | |
nb = trn_idxs[idx,i] | |
l, s = trn_df.loc[nb].Id, trn_d[idx,i] | |
if s > dcut and 'new_whale' not in nbs: nbs['new_whale'] = dcut | |
if l not in nbs: nbs[l] = s | |
if len(nbs) >= 5: break | |
nbs_sorted = list(nbs.items()) | |
score = 0.0 | |
for i in range(min(len(nbs),5)): | |
if nbs_sorted[i][0] == l0: | |
score = 1.0/(i + 1.0) | |
break | |
scores.append(score) | |
print(np.array(scores).mean(), flush=True) | |
return |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment