Skip to content

Instantly share code, notes, and snippets.

@ibaiGorordo
Last active November 7, 2021 12:46
Show Gist options
  • Save ibaiGorordo/208635f2bb8a4ac54f21914cd492c8bd to your computer and use it in GitHub Desktop.
Save ibaiGorordo/208635f2bb8a4ac54f21914cd492c8bd to your computer and use it in GitHub Desktop.
Python script to analyze the input image transformation of the Mediapipe face detection model.
# Referenes:
# - Resize and pad: https://stackoverflow.com/questions/44720580/resize-image-canvas-to-maintain-square-aspect-ratio-in-python-opencv
# - Mediapipe image scale and offset: https://github.com/google/mediapipe/blob/ecb5b5f44ab23ea620ef97a479407c699e424aa7/mediapipe/calculators/tensor/image_to_tensor_utils.cc#L79
# - Mediapipe OpenCV image resize: https://github.com/google/mediapipe/blob/ecb5b5f44ab23ea620ef97a479407c699e424aa7/mediapipe/framework/deps/image_resizer.h#L27
import mediapipe as mp
from mediapipe.python import solution_base
import cv2
import numpy as np
import matplotlib.pyplot as plt
def pad_resize(img, new_shape):
img_height, img_width, img_channels = img.shape
new_width, new_height = new_shape
img_aspect_ratio = img_height/img_width
new_aspect_ratio = new_height/new_width
out_img = np.zeros((new_height, new_width, img_channels),dtype=np.uint8)
ver_pad_half = 0
hor_pad_half = 0
resize_height = new_height
resize_width = new_width
if new_aspect_ratio > img_aspect_ratio:
resize_height = int(np.ceil(new_width * img_aspect_ratio))
ver_pad_half = int(np.ceil((new_height - resize_height)/2))
else:
resize_width = int(np.ceil(new_height / img_aspect_ratio))
hor_pad_half = int(np.ceil((new_width - resize_width)/2))
img_new_coordinates = np.array([[hor_pad_half,ver_pad_half],
[hor_pad_half+resize_width-1,ver_pad_half],
[hor_pad_half+resize_width-1,ver_pad_half+resize_height-1],
[hor_pad_half,ver_pad_half+resize_height-1]], dtype=np.float32)
img_original_coordinates = np.array([[0,0],
[img_width-1,0],
[img_width-1,img_height-1],
[0, img_height-1]], dtype=np.float32)
M = cv2.getPerspectiveTransform(img_original_coordinates, img_new_coordinates)
out_img = cv2.warpPerspective(img, M, new_shape)
return out_img, M
def normalize_image_new(rgb_img, shape):
input_image, M = pad_resize(rgb_img, shape)
return (input_image)*2.0/255-1.0
def normalize_image_original(rgb_img, shape):
input_image = cv2.resize(rgb_img, shape, 0,0, interpolation = cv2.INTER_AREA).astype(np.float32)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
return(input_image / 255 - mean) / std
graph_config = """
input_stream: "IMAGE:image"
output_stream: "FLOATS:floats"
# Converts the input CPU image (ImageFrame) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms the input image into a 192x192 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
}
}
}
node: {
calculator: "TensorsToFloatsCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "FLOATS:floats"
}
"""
if __name__ == '__main__':
mediapipe_input_graph = solution_base.SolutionBase(graph_config=graph_config)
rgb_img = cv2.cvtColor(cv2.imread('images/test.jpg'), cv2.COLOR_BGR2RGB)
# Get mediapipe normalized image
results = mediapipe_input_graph.process(input_data={'image': rgb_img})
data = np.array(results.floats)
mediapipe_image = np.reshape(data,(192,192,-1))
# Get the custom normalization
custom_image_original = normalize_image_original(rgb_img, (192,192))
custom_image_new = normalize_image_new(rgb_img, (192,192))
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
ax1.set_title('Original')
im1 = ax1.imshow(np.max(mediapipe_image-custom_image_original,axis=2), cmap=plt.get_cmap("inferno"))
plt.colorbar(im1, cax=plt.axes([0.05, 0.1, 0.02, 0.8]))
ax2.set_title('New')
im2 = ax2.imshow(np.max(mediapipe_image-custom_image_new,axis=2), cmap=plt.get_cmap("inferno"))
plt.colorbar(im2, cax=plt.axes([0.93, 0.1, 0.02, 0.8]))
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment