Created
March 25, 2019 03:04
-
-
Save hughsando/5612f4b0b645f9e645ab5ac0e99946b9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
MAX_DISTANCE = sqrt(25000.0 ** 2 + 25000.0 ** 2) | |
def score_mine_commodity_label(predicted, actual): | |
""" | |
Score the predictions for commodity label against the actual values | |
Parameters: | |
predicted, actual - predicted and actual deposit info | |
""" | |
# Handle zero deposit case | |
if len(actual) == 0: | |
if len(predicted) == 0: | |
return 1.0 | |
else: | |
return 0.0 | |
elif len(predicted) == 0: | |
return 0.0 | |
pred_commodities = set() | |
for comm in predicted: | |
pred_commodities.add(comm) | |
actual_commodities = set() | |
for comm in actual: | |
actual_commodities.add(comm) | |
# Work out how many right, missed and wrong | |
right = len([c for c in pred_commodities if c in actual_commodities]) | |
wrong = len([c for c in pred_commodities if c not in actual_commodities]) | |
missed = len([c for c in actual_commodities if c not in pred_commodities]) | |
total = right + wrong + missed | |
return 0.5 + ((right - 0.5 * missed - wrong) / (2 * total)) | |
def score_mine(mine0, mine1): | |
dist_score = 1 - np.sqrt( np.square( mine0[:2] - mine1[:2]).sum() ) / MAX_DISTANCE | |
pred_commodities = mine0[2].split(";") | |
actual_commodities = mine1[2].split(";") | |
comm_score = score_mine_commodity_label(pred_commodities, actual_commodities) | |
return 0.75 * comm_score + 0.25 * dist_score | |
def score_stamp_matched(pred_xyc, actual_xyc): | |
total_count = max( pred_xyc.shape[0], actual_xyc.shape[0]) | |
# Got 0/0 = perfect score | |
if total_count==0: | |
return 1.0 | |
common_count = min( pred_xyc.shape[0], actual_xyc.shape[0]) | |
# nothing can match = worst score | |
if common_count==0: | |
return 0.0 | |
total_score = 0.0 | |
actual_used = np.zeros( actual_xyc.shape[0]) | |
for pred in pred_xyc: | |
best_score = -1 | |
best_id = -1 | |
for a_id, act in enumerate(actual_xyc): | |
if not actual_used[a_id]: | |
score = score_mine(pred,act) | |
if score>best_score: | |
best_score = score | |
best_id = a_id | |
total_score += best_score | |
actual_used[best_id] = True | |
return total_score / total_count | |
def score_stamp_matched_dataframe(predicted, actual): | |
pred_xyc = predicted[['x','y','commodity']].values | |
actual_xyc = actual[['x','y','commodity']].values | |
return score_stamp_matched(pred_xyc=pred_xyc, actual_xyc=actual_xyc) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment