ibaiGorordo · November 7, 2021 12:46
diff --git a/MediapipModelInputCheckExample.py b/MediapipModelInputCheckExample.py
 # Referenes: 
 # - Resize and pad: https://stackoverflow.com/questions/44720580/resize-image-canvas-to-maintain-square-aspect-ratio-in-python-opencv
 # - Mediapipe image scale and offset: https://github.com/google/mediapipe/blob/ecb5b5f44ab23ea620ef97a479407c699e424aa7/mediapipe/calculators/tensor/image_to_tensor_utils.cc#L79
 # - Mediapipe OpenCV image resize: https://github.com/google/mediapipe/blob/ecb5b5f44ab23ea620ef97a479407c699e424aa7/mediapipe/framework/deps/image_resizer.h#L27

 import mediapipe as mp
 from mediapipe.python import solution_base
 import cv2
 import numpy as np
 import matplotlib.pyplot as plt

 def pad_resize(img, new_shape):

 	img_height, img_width, img_channels = img.shape
 	new_width, new_height = new_shape

 	img_aspect_ratio = img_height/img_width
 	new_aspect_ratio = new_height/new_width

 	out_img = np.zeros((new_height, new_width, img_channels),dtype=np.uint8)
 	ver_pad_half = 0
 	hor_pad_half = 0
 	resize_height = new_height
 	resize_width = new_width

 	if new_aspect_ratio > img_aspect_ratio:
 		resize_height = int(np.ceil(new_width * img_aspect_ratio))
 		ver_pad_half = int(np.ceil((new_height - resize_height)/2))
 	else:
 		resize_width = int(np.ceil(new_height / img_aspect_ratio))
 		hor_pad_half = int(np.ceil((new_width - resize_width)/2))

 	img_new_coordinates = np.array([[hor_pad_half,ver_pad_half],
 					[hor_pad_half+resize_width-1,ver_pad_half],
 					[hor_pad_half+resize_width-1,ver_pad_half+resize_height-1],
 					[hor_pad_half,ver_pad_half+resize_height-1]], dtype=np.float32)

 	img_original_coordinates = np.array([[0,0],
 					[img_width-1,0],
 					[img_width-1,img_height-1],
 					[0, img_height-1]], dtype=np.float32)


 	M = cv2.getPerspectiveTransform(img_original_coordinates, img_new_coordinates)
 	out_img = cv2.warpPerspective(img, M, new_shape)

 	return out_img, M

 def normalize_image_new(rgb_img, shape):
 	input_image, M = pad_resize(rgb_img, shape)

 	return (input_image)*2.0/255-1.0 

 def normalize_image_original(rgb_img, shape):

 	input_image = cv2.resize(rgb_img, shape, 0,0, interpolation = cv2.INTER_AREA).astype(np.float32)

 	mean = [0.485, 0.456, 0.406]
 	std = [0.229, 0.224, 0.225]
 	return(input_image  / 255 - mean) / std


 graph_config = """
 input_stream: "IMAGE:image"
 output_stream: "FLOATS:floats"

 # Converts the input CPU image (ImageFrame) to the multi-backend image type
 # (Image).
 node: {
  calculator: "ToImageCalculator"
  input_stream: "IMAGE_CPU:image"
  output_stream: "IMAGE:multi_backend_image"
 }

 # Transforms the input image into a 192x192 tensor while keeping the aspect
 # ratio (what is expected by the corresponding face detection model), resulting
 # in potential letterboxing in the transformed image.
 node: {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE:multi_backend_image"
  output_stream: "TENSORS:input_tensors"
  output_stream: "MATRIX:transform_matrix"
  options: {
 	[mediapipe.ImageToTensorCalculatorOptions.ext] {
 	  output_tensor_width: 192
 	  output_tensor_height: 192
 	  keep_aspect_ratio: true
 	  output_tensor_float_range {
 		min: -1.0
 		max: 1.0
 	  }
 	  border_mode: BORDER_ZERO
 	}
  }
 }

 node: {
  calculator: "TensorsToFloatsCalculator"
  input_stream: "TENSORS:input_tensors"
  output_stream: "FLOATS:floats"
 }
 """

 if __name__ == '__main__':
 	
 	mediapipe_input_graph = solution_base.SolutionBase(graph_config=graph_config)

 	rgb_img = cv2.cvtColor(cv2.imread('images/test.jpg'), cv2.COLOR_BGR2RGB)

 	# Get mediapipe normalized image
 	results = mediapipe_input_graph.process(input_data={'image': rgb_img})
 	data = np.array(results.floats)
 	mediapipe_image = np.reshape(data,(192,192,-1))

 	# Get the custom normalization
 	custom_image_original = normalize_image_original(rgb_img, (192,192))
 	custom_image_new = normalize_image_new(rgb_img, (192,192))

 	f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
 	ax1.set_title('Original')
 	im1 = ax1.imshow(np.max(mediapipe_image-custom_image_original,axis=2), cmap=plt.get_cmap("inferno"))
 	plt.colorbar(im1, cax=plt.axes([0.05, 0.1, 0.02, 0.8]))
 	ax2.set_title('New')
 	im2 = ax2.imshow(np.max(mediapipe_image-custom_image_new,axis=2), cmap=plt.get_cmap("inferno"))
 	plt.colorbar(im2, cax=plt.axes([0.93, 0.1, 0.02, 0.8]))
 	plt.show()
	# Referenes:
	# - Resize and pad: https://stackoverflow.com/questions/44720580/resize-image-canvas-to-maintain-square-aspect-ratio-in-python-opencv
	# - Mediapipe image scale and offset: https://github.com/google/mediapipe/blob/ecb5b5f44ab23ea620ef97a479407c699e424aa7/mediapipe/calculators/tensor/image_to_tensor_utils.cc#L79
	# - Mediapipe OpenCV image resize: https://github.com/google/mediapipe/blob/ecb5b5f44ab23ea620ef97a479407c699e424aa7/mediapipe/framework/deps/image_resizer.h#L27

	import mediapipe as mp
	from mediapipe.python import solution_base
	import cv2
	import numpy as np
	import matplotlib.pyplot as plt

	def pad_resize(img, new_shape):

	img_height, img_width, img_channels = img.shape
	new_width, new_height = new_shape

	img_aspect_ratio = img_height/img_width
	new_aspect_ratio = new_height/new_width

	out_img = np.zeros((new_height, new_width, img_channels),dtype=np.uint8)
	ver_pad_half = 0
	hor_pad_half = 0
	resize_height = new_height
	resize_width = new_width

	if new_aspect_ratio > img_aspect_ratio:
	resize_height = int(np.ceil(new_width * img_aspect_ratio))
	ver_pad_half = int(np.ceil((new_height - resize_height)/2))
	else:
	resize_width = int(np.ceil(new_height / img_aspect_ratio))
	hor_pad_half = int(np.ceil((new_width - resize_width)/2))

	img_new_coordinates = np.array([[hor_pad_half,ver_pad_half],
	[hor_pad_half+resize_width-1,ver_pad_half],
	[hor_pad_half+resize_width-1,ver_pad_half+resize_height-1],
	[hor_pad_half,ver_pad_half+resize_height-1]], dtype=np.float32)

	img_original_coordinates = np.array([[0,0],
	[img_width-1,0],
	[img_width-1,img_height-1],
	[0, img_height-1]], dtype=np.float32)


	M = cv2.getPerspectiveTransform(img_original_coordinates, img_new_coordinates)
	out_img = cv2.warpPerspective(img, M, new_shape)

	return out_img, M

	def normalize_image_new(rgb_img, shape):
	input_image, M = pad_resize(rgb_img, shape)

	return (input_image)*2.0/255-1.0

	def normalize_image_original(rgb_img, shape):

	input_image = cv2.resize(rgb_img, shape, 0,0, interpolation = cv2.INTER_AREA).astype(np.float32)

	mean = [0.485, 0.456, 0.406]
	std = [0.229, 0.224, 0.225]
	return(input_image / 255 - mean) / std


	graph_config = """
	input_stream: "IMAGE:image"
	output_stream: "FLOATS:floats"

	# Converts the input CPU image (ImageFrame) to the multi-backend image type
	# (Image).
	node: {
	calculator: "ToImageCalculator"
	input_stream: "IMAGE_CPU:image"
	output_stream: "IMAGE:multi_backend_image"
	}

	# Transforms the input image into a 192x192 tensor while keeping the aspect
	# ratio (what is expected by the corresponding face detection model), resulting
	# in potential letterboxing in the transformed image.
	node: {
	calculator: "ImageToTensorCalculator"
	input_stream: "IMAGE:multi_backend_image"
	output_stream: "TENSORS:input_tensors"
	output_stream: "MATRIX:transform_matrix"
	options: {
	[mediapipe.ImageToTensorCalculatorOptions.ext] {
	output_tensor_width: 192
	output_tensor_height: 192
	keep_aspect_ratio: true
	output_tensor_float_range {
	min: -1.0
	max: 1.0
	}
	border_mode: BORDER_ZERO
	}
	}
	}

	node: {
	calculator: "TensorsToFloatsCalculator"
	input_stream: "TENSORS:input_tensors"
	output_stream: "FLOATS:floats"
	}
	"""

	if __name__ == '__main__':

	mediapipe_input_graph = solution_base.SolutionBase(graph_config=graph_config)

	rgb_img = cv2.cvtColor(cv2.imread('images/test.jpg'), cv2.COLOR_BGR2RGB)

	# Get mediapipe normalized image
	results = mediapipe_input_graph.process(input_data={'image': rgb_img})
	data = np.array(results.floats)
	mediapipe_image = np.reshape(data,(192,192,-1))

	# Get the custom normalization
	custom_image_original = normalize_image_original(rgb_img, (192,192))
	custom_image_new = normalize_image_new(rgb_img, (192,192))

	f, (ax1, ax2) = plt.subplots(1, 2, sharey=True)
	ax1.set_title('Original')
	im1 = ax1.imshow(np.max(mediapipe_image-custom_image_original,axis=2), cmap=plt.get_cmap("inferno"))
	plt.colorbar(im1, cax=plt.axes([0.05, 0.1, 0.02, 0.8]))
	ax2.set_title('New')
	im2 = ax2.imshow(np.max(mediapipe_image-custom_image_new,axis=2), cmap=plt.get_cmap("inferno"))
	plt.colorbar(im2, cax=plt.axes([0.93, 0.1, 0.02, 0.8]))
	plt.show()