ibaiGorordo · November 7, 2021 12:45
diff --git a/MediapipeModelRawOutputCheckExample.py b/MediapipeModelRawOutputCheckExample.py
 # Referenes: 
 # - Mediapipe face detection models: https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection

 import os
 import urllib
 import mediapipe as mp
 from mediapipe.python import solution_base
 import cv2
 import numpy as np

 try:
 	from tflite_runtime.interpreter import Interpreter
 except:
 	from tensorflow.lite.python.interpreter import Interpreter

 def download_github_model(model_url, model_path):

    if not os.path.exists(model_path):
        model_data = urllib.request.urlopen(model_url).read()
        model_bytes = bytearray(model_data)
        open(model_path,"wb+").write(model_bytes)

 graph_structure_config = """
 input_stream: "IMAGE:image"
 output_stream: "FLOATS:floats"
 """

 input_config =  """
 # Converts the input CPU image (ImageFrame) to the multi-backend image type
 # (Image).
 node: {
  calculator: "ToImageCalculator"
  input_stream: "IMAGE_CPU:image"
  output_stream: "IMAGE:multi_backend_image"
 }

 # Transforms the input image into a 192x192 tensor while keeping the aspect
 # ratio (what is expected by the corresponding face detection model), resulting
 # in potential letterboxing in the transformed image.
 node: {
  calculator: "ImageToTensorCalculator"
  input_stream: "IMAGE:multi_backend_image"
  output_stream: "TENSORS:input_tensors"
  output_stream: "MATRIX:transform_matrix"
  options: {
 	[mediapipe.ImageToTensorCalculatorOptions.ext] {
 	  output_tensor_width: 192
 	  output_tensor_height: 192
 	  keep_aspect_ratio: true
 	  output_tensor_float_range {
 		min: -1.0
 		max: 1.0
 	  }
 	  border_mode: BORDER_ZERO
 	}
  }
 }
 """


 inference_config =  """
 # Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
 # vector of tensors representing, for instance, detection boxes/keypoints and
 # scores.
 node {
  calculator: "InferenceCalculator"
  input_stream: "TENSORS:input_tensors"
  output_stream: "TENSORS:detection_tensors"
  options: {
    [mediapipe.InferenceCalculatorOptions.ext] {
      model_path: "models/face_detection_full_range_sparse.tflite"
      delegate {
        xnnpack {}
      }
    }
  }
 }
 """

 input_tensor_to_floats_config =  """
 node: {
  calculator: "TensorsToFloatsCalculator"
  input_stream: "TENSORS:input_tensors"
  output_stream: "FLOATS:floats"
 }
 """

 detection_tensor_to_floats_config =  """
 node: {
  calculator: "TensorsToFloatsCalculator"
  input_stream: "TENSORS:detection_tensors"
  output_stream: "FLOATS:floats"
 }
 """

 if __name__ == '__main__':

 	model_url = "https://github.com/google/mediapipe/blob/master/mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite?raw=true"
 	model_path = "models/face_detection_full_range_sparse.tflite"

 	# Download the tflite model from the Mediapipe github
 	download_github_model(model_url, model_path)

 	# Read the image for testing
 	rgb_img = cv2.cvtColor(cv2.imread('images/test.jpg'), cv2.COLOR_BGR2RGB)
 	


 	"""        Mediapipe Inference   """

 	# Generate the mediapipe graph to perform the image preparation and model inference
 	graph_config = graph_structure_config+input_config+inference_config+detection_tensor_to_floats_config
 	mediapipe_inference_graph = solution_base.SolutionBase(graph_config=graph_config)

 	# Get mediapipe raw results
 	mediapipe_raw_results = np.array(mediapipe_inference_graph.process(input_data={'image': rgb_img}).floats)


 	"""        TFLite Inference   """

 	# Generate the mediapipe graph to get the normalized image to pass to the model in tflite
 	graph_config = graph_structure_config+input_config+input_tensor_to_floats_config
 	mediapipe_input_graph = solution_base.SolutionBase(graph_config=graph_config)

 	# Get the normalized image from the graph
 	results = np.array(mediapipe_input_graph.process(input_data={'image': rgb_img}).floats, dtype=np.float32)
 	input_tensor = np.reshape(results,(1,192,192,3))

 	# Initialize the TFLite model
 	interpreter = Interpreter(model_path=model_path, num_threads = 4)
 	interpreter.allocate_tensors()

 	# Peform the inference with the model in TFLite
 	interpreter.set_tensor(interpreter.get_input_details()[0]['index'], input_tensor)
 	interpreter.invoke()
 	tflite_raw_results = np.squeeze(interpreter.get_tensor(interpreter.get_output_details()[0]['index']))

 	print(f"\nSum of differences: {np.sum(mediapipe_raw_results- tflite_raw_results.flatten())}\n")
	# Referenes:
	# - Mediapipe face detection models: https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection

	import os
	import urllib
	import mediapipe as mp
	from mediapipe.python import solution_base
	import cv2
	import numpy as np

	try:
	from tflite_runtime.interpreter import Interpreter
	except:
	from tensorflow.lite.python.interpreter import Interpreter

	def download_github_model(model_url, model_path):

	if not os.path.exists(model_path):
	model_data = urllib.request.urlopen(model_url).read()
	model_bytes = bytearray(model_data)
	open(model_path,"wb+").write(model_bytes)

	graph_structure_config = """
	input_stream: "IMAGE:image"
	output_stream: "FLOATS:floats"
	"""

	input_config = """
	# Converts the input CPU image (ImageFrame) to the multi-backend image type
	# (Image).
	node: {
	calculator: "ToImageCalculator"
	input_stream: "IMAGE_CPU:image"
	output_stream: "IMAGE:multi_backend_image"
	}

	# Transforms the input image into a 192x192 tensor while keeping the aspect
	# ratio (what is expected by the corresponding face detection model), resulting
	# in potential letterboxing in the transformed image.
	node: {
	calculator: "ImageToTensorCalculator"
	input_stream: "IMAGE:multi_backend_image"
	output_stream: "TENSORS:input_tensors"
	output_stream: "MATRIX:transform_matrix"
	options: {
	[mediapipe.ImageToTensorCalculatorOptions.ext] {
	output_tensor_width: 192
	output_tensor_height: 192
	keep_aspect_ratio: true
	output_tensor_float_range {
	min: -1.0
	max: 1.0
	}
	border_mode: BORDER_ZERO
	}
	}
	}
	"""


	inference_config = """
	# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
	# vector of tensors representing, for instance, detection boxes/keypoints and
	# scores.
	node {
	calculator: "InferenceCalculator"
	input_stream: "TENSORS:input_tensors"
	output_stream: "TENSORS:detection_tensors"
	options: {
	[mediapipe.InferenceCalculatorOptions.ext] {
	model_path: "models/face_detection_full_range_sparse.tflite"
	delegate {
	xnnpack {}
	}
	}
	}
	}
	"""

	input_tensor_to_floats_config = """
	node: {
	calculator: "TensorsToFloatsCalculator"
	input_stream: "TENSORS:input_tensors"
	output_stream: "FLOATS:floats"
	}
	"""

	detection_tensor_to_floats_config = """
	node: {
	calculator: "TensorsToFloatsCalculator"
	input_stream: "TENSORS:detection_tensors"
	output_stream: "FLOATS:floats"
	}
	"""

	if __name__ == '__main__':

	model_url = "https://github.com/google/mediapipe/blob/master/mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite?raw=true"
	model_path = "models/face_detection_full_range_sparse.tflite"

	# Download the tflite model from the Mediapipe github
	download_github_model(model_url, model_path)

	# Read the image for testing
	rgb_img = cv2.cvtColor(cv2.imread('images/test.jpg'), cv2.COLOR_BGR2RGB)



	""" Mediapipe Inference """

	# Generate the mediapipe graph to perform the image preparation and model inference
	graph_config = graph_structure_config+input_config+inference_config+detection_tensor_to_floats_config
	mediapipe_inference_graph = solution_base.SolutionBase(graph_config=graph_config)

	# Get mediapipe raw results
	mediapipe_raw_results = np.array(mediapipe_inference_graph.process(input_data={'image': rgb_img}).floats)


	""" TFLite Inference """

	# Generate the mediapipe graph to get the normalized image to pass to the model in tflite
	graph_config = graph_structure_config+input_config+input_tensor_to_floats_config
	mediapipe_input_graph = solution_base.SolutionBase(graph_config=graph_config)

	# Get the normalized image from the graph
	results = np.array(mediapipe_input_graph.process(input_data={'image': rgb_img}).floats, dtype=np.float32)
	input_tensor = np.reshape(results,(1,192,192,3))

	# Initialize the TFLite model
	interpreter = Interpreter(model_path=model_path, num_threads = 4)
	interpreter.allocate_tensors()

	# Peform the inference with the model in TFLite
	interpreter.set_tensor(interpreter.get_input_details()[0]['index'], input_tensor)
	interpreter.invoke()
	tflite_raw_results = np.squeeze(interpreter.get_tensor(interpreter.get_output_details()[0]['index']))

	print(f"\nSum of differences: {np.sum(mediapipe_raw_results- tflite_raw_results.flatten())}\n")