Created
November 7, 2021 12:45
-
-
Save ibaiGorordo/43022b9a5cd83bbb0efbb1185446a840 to your computer and use it in GitHub Desktop.
Python script to analyze the what the model inference is doing in Mediapipe.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Referenes: | |
# - Mediapipe face detection models: https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection | |
import os | |
import urllib | |
import mediapipe as mp | |
from mediapipe.python import solution_base | |
import cv2 | |
import numpy as np | |
try: | |
from tflite_runtime.interpreter import Interpreter | |
except: | |
from tensorflow.lite.python.interpreter import Interpreter | |
def download_github_model(model_url, model_path): | |
if not os.path.exists(model_path): | |
model_data = urllib.request.urlopen(model_url).read() | |
model_bytes = bytearray(model_data) | |
open(model_path,"wb+").write(model_bytes) | |
graph_structure_config = """ | |
input_stream: "IMAGE:image" | |
output_stream: "FLOATS:floats" | |
""" | |
input_config = """ | |
# Converts the input CPU image (ImageFrame) to the multi-backend image type | |
# (Image). | |
node: { | |
calculator: "ToImageCalculator" | |
input_stream: "IMAGE_CPU:image" | |
output_stream: "IMAGE:multi_backend_image" | |
} | |
# Transforms the input image into a 192x192 tensor while keeping the aspect | |
# ratio (what is expected by the corresponding face detection model), resulting | |
# in potential letterboxing in the transformed image. | |
node: { | |
calculator: "ImageToTensorCalculator" | |
input_stream: "IMAGE:multi_backend_image" | |
output_stream: "TENSORS:input_tensors" | |
output_stream: "MATRIX:transform_matrix" | |
options: { | |
[mediapipe.ImageToTensorCalculatorOptions.ext] { | |
output_tensor_width: 192 | |
output_tensor_height: 192 | |
keep_aspect_ratio: true | |
output_tensor_float_range { | |
min: -1.0 | |
max: 1.0 | |
} | |
border_mode: BORDER_ZERO | |
} | |
} | |
} | |
""" | |
inference_config = """ | |
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a | |
# vector of tensors representing, for instance, detection boxes/keypoints and | |
# scores. | |
node { | |
calculator: "InferenceCalculator" | |
input_stream: "TENSORS:input_tensors" | |
output_stream: "TENSORS:detection_tensors" | |
options: { | |
[mediapipe.InferenceCalculatorOptions.ext] { | |
model_path: "models/face_detection_full_range_sparse.tflite" | |
delegate { | |
xnnpack {} | |
} | |
} | |
} | |
} | |
""" | |
input_tensor_to_floats_config = """ | |
node: { | |
calculator: "TensorsToFloatsCalculator" | |
input_stream: "TENSORS:input_tensors" | |
output_stream: "FLOATS:floats" | |
} | |
""" | |
detection_tensor_to_floats_config = """ | |
node: { | |
calculator: "TensorsToFloatsCalculator" | |
input_stream: "TENSORS:detection_tensors" | |
output_stream: "FLOATS:floats" | |
} | |
""" | |
if __name__ == '__main__': | |
model_url = "https://github.com/google/mediapipe/blob/master/mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite?raw=true" | |
model_path = "models/face_detection_full_range_sparse.tflite" | |
# Download the tflite model from the Mediapipe github | |
download_github_model(model_url, model_path) | |
# Read the image for testing | |
rgb_img = cv2.cvtColor(cv2.imread('images/test.jpg'), cv2.COLOR_BGR2RGB) | |
""" Mediapipe Inference """ | |
# Generate the mediapipe graph to perform the image preparation and model inference | |
graph_config = graph_structure_config+input_config+inference_config+detection_tensor_to_floats_config | |
mediapipe_inference_graph = solution_base.SolutionBase(graph_config=graph_config) | |
# Get mediapipe raw results | |
mediapipe_raw_results = np.array(mediapipe_inference_graph.process(input_data={'image': rgb_img}).floats) | |
""" TFLite Inference """ | |
# Generate the mediapipe graph to get the normalized image to pass to the model in tflite | |
graph_config = graph_structure_config+input_config+input_tensor_to_floats_config | |
mediapipe_input_graph = solution_base.SolutionBase(graph_config=graph_config) | |
# Get the normalized image from the graph | |
results = np.array(mediapipe_input_graph.process(input_data={'image': rgb_img}).floats, dtype=np.float32) | |
input_tensor = np.reshape(results,(1,192,192,3)) | |
# Initialize the TFLite model | |
interpreter = Interpreter(model_path=model_path, num_threads = 4) | |
interpreter.allocate_tensors() | |
# Peform the inference with the model in TFLite | |
interpreter.set_tensor(interpreter.get_input_details()[0]['index'], input_tensor) | |
interpreter.invoke() | |
tflite_raw_results = np.squeeze(interpreter.get_tensor(interpreter.get_output_details()[0]['index'])) | |
print(f"\nSum of differences: {np.sum(mediapipe_raw_results- tflite_raw_results.flatten())}\n") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment