Skip to content

Instantly share code, notes, and snippets.

@hughsando
Created March 25, 2019 03:04
Show Gist options
  • Save hughsando/5612f4b0b645f9e645ab5ac0e99946b9 to your computer and use it in GitHub Desktop.
Save hughsando/5612f4b0b645f9e645ab5ac0e99946b9 to your computer and use it in GitHub Desktop.
MAX_DISTANCE = sqrt(25000.0 ** 2 + 25000.0 ** 2)
def score_mine_commodity_label(predicted, actual):
"""
Score the predictions for commodity label against the actual values
Parameters:
predicted, actual - predicted and actual deposit info
"""
# Handle zero deposit case
if len(actual) == 0:
if len(predicted) == 0:
return 1.0
else:
return 0.0
elif len(predicted) == 0:
return 0.0
pred_commodities = set()
for comm in predicted:
pred_commodities.add(comm)
actual_commodities = set()
for comm in actual:
actual_commodities.add(comm)
# Work out how many right, missed and wrong
right = len([c for c in pred_commodities if c in actual_commodities])
wrong = len([c for c in pred_commodities if c not in actual_commodities])
missed = len([c for c in actual_commodities if c not in pred_commodities])
total = right + wrong + missed
return 0.5 + ((right - 0.5 * missed - wrong) / (2 * total))
def score_mine(mine0, mine1):
dist_score = 1 - np.sqrt( np.square( mine0[:2] - mine1[:2]).sum() ) / MAX_DISTANCE
pred_commodities = mine0[2].split(";")
actual_commodities = mine1[2].split(";")
comm_score = score_mine_commodity_label(pred_commodities, actual_commodities)
return 0.75 * comm_score + 0.25 * dist_score
def score_stamp_matched(pred_xyc, actual_xyc):
total_count = max( pred_xyc.shape[0], actual_xyc.shape[0])
# Got 0/0 = perfect score
if total_count==0:
return 1.0
common_count = min( pred_xyc.shape[0], actual_xyc.shape[0])
# nothing can match = worst score
if common_count==0:
return 0.0
total_score = 0.0
actual_used = np.zeros( actual_xyc.shape[0])
for pred in pred_xyc:
best_score = -1
best_id = -1
for a_id, act in enumerate(actual_xyc):
if not actual_used[a_id]:
score = score_mine(pred,act)
if score>best_score:
best_score = score
best_id = a_id
total_score += best_score
actual_used[best_id] = True
return total_score / total_count
def score_stamp_matched_dataframe(predicted, actual):
pred_xyc = predicted[['x','y','commodity']].values
actual_xyc = actual[['x','y','commodity']].values
return score_stamp_matched(pred_xyc=pred_xyc, actual_xyc=actual_xyc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment