Last active
June 12, 2020 01:44
-
-
Save nicbor/54ba55aac1b917bda412cf93f48b0534 to your computer and use it in GitHub Desktop.
From team at CrowdAI (https://crowdai.com) How to calculate segmentation f1 scores for video sequences
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Union, Tuple, Callable, List, Dict | |
import numpy as np | |
from past.utils import old_div | |
import numpy as np | |
from shapely import ops, wkt, geometry | |
import shapely.geometry as shg | |
from shapely.geometry.base import BaseGeometry as Shape | |
import cv2 | |
def draw_shape(image, shape, annotation_color, default_radius: int = 3): | |
""" Draws a shapely shape into a numpy array""" | |
if isinstance(shape, tuple): | |
shape, style = shape | |
elif isinstance(shape, Shape): | |
style = {} | |
else: | |
raise Exception(('shape should be either a tuple (Shape, dict) or ' | |
'an instance of Shape, found: {}').format(type(shape))) | |
if shape.type == 'LineString': | |
if style.get('width', 1) < 1: | |
print('Found line with px width %s, drawing it as 1', style.get('width', 1)) | |
line_thickness = max(1, int(python2.round(style.get('width', 1)))) | |
x, y = shape.xy | |
points = np.asarray(list(zip(x, y))) | |
cv2.polylines( | |
image, | |
np.int32([points]), | |
False, | |
color=annotation_color, | |
thickness=line_thickness, | |
) | |
elif shape.type == 'Polygon': | |
boundary = shape.boundary | |
if boundary.type == 'LineString': | |
x, y = boundary.xy | |
points = np.asarray(list(zip(x, y))) | |
# not using Antialiasing as LineType to aviod the gradient of integer values generated | |
# due gaussian filtering | |
cv2.fillPoly(image, pts=np.int32([points]), color=annotation_color) | |
elif boundary.type == 'MultiLineString': | |
for line in boundary.geoms: | |
x, y = line.xy | |
points = np.asarray(list(zip(x, y))) | |
cv2.fillPoly(image, pts=np.int32([points]), color=annotation_color) | |
elif shape.type == 'MultiPolygon': | |
for s in list(shape): | |
draw_shape(image, s, annotation_color) | |
elif shape.type == 'Point': | |
radius = style.get('radius', default_radius) | |
x = np.int32(shape.x) | |
y = np.int32(shape.y) | |
cv2.circle(image, (x, y), radius, annotation_color, thickness=-1) | |
return image | |
def draw_shapes(shapes, img_size): | |
res = np.zeros(img_size, np.uint8) | |
for shape in shapes: | |
draw_shape(res, shape, (1,)) | |
return res | |
def compute_f1(true_positives: int, false_positives: int, false_negatives: int, | |
prec_rec: bool = False) -> Union[float, Tuple[float, float, float]]: | |
"""Computes the f1 score | |
Args: | |
true_positives: int, count of true positives | |
false_positives: int, count of false positives | |
false_negatives: int, count of false negatives | |
Returns: | |
f1 score as float or (f1, precision, recall) | |
""" | |
eps = np.finfo(np.float32).eps | |
# compute F1 | |
prec = old_div(true_positives, float(true_positives + false_positives + eps)) | |
rec = old_div(true_positives, float(true_positives + false_negatives + eps)) | |
f1 = old_div((2 * prec * rec), (prec + rec + eps)) | |
if prec_rec: | |
return f1, prec, rec | |
else: | |
return f1 | |
def bin_class_metrics_numpy(y_true: np.array, y_pred: np.array) -> Tuple[int, int, int, int]: | |
"""Compute the 4 basic binary class metrics for 2 arrays""" | |
y_pred_pos = np.round(np.clip(y_pred, 0, 1)) | |
y_pred_neg = 1 - y_pred_pos | |
y_pos = np.round(np.clip(y_true, 0, 1)) | |
y_neg = 1 - y_pos | |
true_positives = int(np.sum(y_pos * y_pred_pos)) | |
tn = int(np.sum(y_neg * y_pred_neg)) | |
false_positives = int(np.sum(y_neg * y_pred_pos)) | |
false_negatives = int(np.sum(y_pos * y_pred_neg)) | |
return true_positives, tn, false_positives, false_negatives | |
def compute_f1_numpy(y_trues: List[List[np.array]], y_preds: List[List[np.array]]) -> float: | |
""" | |
Computes the f1 score between list of list binary arrays | |
Args: | |
y_trues: a list of videos, each being a list of frames, each a 2d binary array (dtype=bool) | |
representation of "ground truth" labels | |
y_preds: a list of videos, each being a list of frames, each a 2d binary array (dtype=bool) | |
representation of model-predicted labels. should have same shape as y_preds | |
""" | |
total_true_positives, total_false_positives, total_false_negatives = 0, 0, 0 | |
for video_idx in range(len(y_trues)): | |
for idx in range(len(y_trues[video_idx])): | |
y_true = y_trues[video_idx][idx] | |
y_pred = y_preds[video_idx][idx] | |
true_positives, _, false_positives, false_negatives = bin_class_metrics_numpy(y_true, y_pred) | |
total_true_positives += true_positives | |
total_false_positives += false_positives | |
total_false_negatives += false_negatives | |
f1, precision, recall = compute_f1(total_true_positives, total_false_positives, | |
total_false_negatives, prec_rec=True) | |
print(f'F1 score {f1}') | |
print(f'Precision score {precision}') | |
print(f'Recall score {recall}') | |
return f1 | |
"""Simulate a dummy case with 3 videos (each video is 150 frames) | |
where labels all postivives and 75% of pixels are predicted to be positive | |
F1 should be ~.85, Precision 1, and Recall .75 | |
""" | |
print('Simulating scoring a collection of videos . . .') | |
preds = [np.random.random((150, 728, 1028)) > .25 for _ in range(3)] | |
labels = [np.ones((150, 728, 1028), dtype=bool) for _ in range(3)] | |
compute_f1_numpy(labels, preds) | |
"""Simulate a dummy case for scoring a list of 4 images instead of videos | |
where labels all postivives and 75% of pixels are predicted to be positive | |
F1 should be ~.85, Precision 1, and Recall .75. note the main difference is | |
removing the first dimension of our arrays. | |
""" | |
print('Simulating scoring a collection of images . . .') | |
preds = [np.random.random((728, 1028)) > .25 for _ in range(4)] | |
labels = [np.ones((728, 1028), dtype=bool) for _ in range(4)] | |
compute_f1_numpy(labels, preds) | |
""" make a dummy triangle as ground truth. frame_size is the size of the whole frame """ | |
print('Simulating scoring a polygon . . .') | |
label = geometry.Polygon([[100, 100], [200, 100], [200, 150]]) | |
frame_size = (256, 256) | |
label_npmask = draw_shapes([label], frame_size) | |
""" As offset goes to 0, F1 should go to 1 """ | |
offset = 25 | |
pred = geometry.Polygon([[100 + offset, 100 + offset], [200 + offset, 100 + offset], [200 + offset, 150 + offset]]) | |
frame_size = (256, 256) | |
pred_npmask = draw_shapes([pred], frame_size) | |
compute_f1_numpy([label_npmask], [pred_npmask]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment