Last active
November 7, 2021 12:46
-
-
Save ibaiGorordo/208635f2bb8a4ac54f21914cd492c8bd to your computer and use it in GitHub Desktop.
Python script to analyze the input image transformation of the Mediapipe face detection model.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Referenes: | |
# - Resize and pad: https://stackoverflow.com/questions/44720580/resize-image-canvas-to-maintain-square-aspect-ratio-in-python-opencv | |
# - Mediapipe image scale and offset: https://github.com/google/mediapipe/blob/ecb5b5f44ab23ea620ef97a479407c699e424aa7/mediapipe/calculators/tensor/image_to_tensor_utils.cc#L79 | |
# - Mediapipe OpenCV image resize: https://github.com/google/mediapipe/blob/ecb5b5f44ab23ea620ef97a479407c699e424aa7/mediapipe/framework/deps/image_resizer.h#L27 | |
import mediapipe as mp | |
from mediapipe.python import solution_base | |
import cv2 | |
import numpy as np | |
import matplotlib.pyplot as plt | |
def pad_resize(img, new_shape): | |
img_height, img_width, img_channels = img.shape | |
new_width, new_height = new_shape | |
img_aspect_ratio = img_height/img_width | |
new_aspect_ratio = new_height/new_width | |
out_img = np.zeros((new_height, new_width, img_channels),dtype=np.uint8) | |
ver_pad_half = 0 | |
hor_pad_half = 0 | |
resize_height = new_height | |
resize_width = new_width | |
if new_aspect_ratio > img_aspect_ratio: | |
resize_height = int(np.ceil(new_width * img_aspect_ratio)) | |
ver_pad_half = int(np.ceil((new_height - resize_height)/2)) | |
else: | |
resize_width = int(np.ceil(new_height / img_aspect_ratio)) | |
hor_pad_half = int(np.ceil((new_width - resize_width)/2)) | |
img_new_coordinates = np.array([[hor_pad_half,ver_pad_half], | |
[hor_pad_half+resize_width-1,ver_pad_half], | |
[hor_pad_half+resize_width-1,ver_pad_half+resize_height-1], | |
[hor_pad_half,ver_pad_half+resize_height-1]], dtype=np.float32) | |
img_original_coordinates = np.array([[0,0], | |
[img_width-1,0], | |
[img_width-1,img_height-1], | |
[0, img_height-1]], dtype=np.float32) | |
M = cv2.getPerspectiveTransform(img_original_coordinates, img_new_coordinates) | |
out_img = cv2.warpPerspective(img, M, new_shape) | |
return out_img, M | |
def normalize_image_new(rgb_img, shape): | |
input_image, M = pad_resize(rgb_img, shape) | |
return (input_image)*2.0/255-1.0 | |
def normalize_image_original(rgb_img, shape): | |
input_image = cv2.resize(rgb_img, shape, 0,0, interpolation = cv2.INTER_AREA).astype(np.float32) | |
mean = [0.485, 0.456, 0.406] | |
std = [0.229, 0.224, 0.225] | |
return(input_image / 255 - mean) / std | |
graph_config = """ | |
input_stream: "IMAGE:image" | |
output_stream: "FLOATS:floats" | |
# Converts the input CPU image (ImageFrame) to the multi-backend image type | |
# (Image). | |
node: { | |
calculator: "ToImageCalculator" | |
input_stream: "IMAGE_CPU:image" | |
output_stream: "IMAGE:multi_backend_image" | |
} | |
# Transforms the input image into a 192x192 tensor while keeping the aspect | |
# ratio (what is expected by the corresponding face detection model), resulting | |
# in potential letterboxing in the transformed image. | |
node: { | |
calculator: "ImageToTensorCalculator" | |
input_stream: "IMAGE:multi_backend_image" | |
output_stream: "TENSORS:input_tensors" | |
output_stream: "MATRIX:transform_matrix" | |
options: { | |
[mediapipe.ImageToTensorCalculatorOptions.ext] { | |
output_tensor_width: 192 | |
output_tensor_height: 192 | |
keep_aspect_ratio: true | |
output_tensor_float_range { | |
min: -1.0 | |
max: 1.0 | |
} | |
border_mode: BORDER_ZERO | |
} | |
} | |
} | |
node: { | |
calculator: "TensorsToFloatsCalculator" | |
input_stream: "TENSORS:input_tensors" | |
output_stream: "FLOATS:floats" | |
} | |
""" | |
if __name__ == '__main__': | |
mediapipe_input_graph = solution_base.SolutionBase(graph_config=graph_config) | |
rgb_img = cv2.cvtColor(cv2.imread('images/test.jpg'), cv2.COLOR_BGR2RGB) | |
# Get mediapipe normalized image | |
results = mediapipe_input_graph.process(input_data={'image': rgb_img}) | |
data = np.array(results.floats) | |
mediapipe_image = np.reshape(data,(192,192,-1)) | |
# Get the custom normalization | |
custom_image_original = normalize_image_original(rgb_img, (192,192)) | |
custom_image_new = normalize_image_new(rgb_img, (192,192)) | |
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) | |
ax1.set_title('Original') | |
im1 = ax1.imshow(np.max(mediapipe_image-custom_image_original,axis=2), cmap=plt.get_cmap("inferno")) | |
plt.colorbar(im1, cax=plt.axes([0.05, 0.1, 0.02, 0.8])) | |
ax2.set_title('New') | |
im2 = ax2.imshow(np.max(mediapipe_image-custom_image_new,axis=2), cmap=plt.get_cmap("inferno")) | |
plt.colorbar(im2, cax=plt.axes([0.93, 0.1, 0.02, 0.8])) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment