Skip to content

Instantly share code, notes, and snippets.

@ibaiGorordo
Created November 7, 2021 12:45
Show Gist options
  • Save ibaiGorordo/43022b9a5cd83bbb0efbb1185446a840 to your computer and use it in GitHub Desktop.
Save ibaiGorordo/43022b9a5cd83bbb0efbb1185446a840 to your computer and use it in GitHub Desktop.
Python script to analyze the what the model inference is doing in Mediapipe.
# Referenes:
# - Mediapipe face detection models: https://github.com/google/mediapipe/tree/master/mediapipe/modules/face_detection
import os
import urllib
import mediapipe as mp
from mediapipe.python import solution_base
import cv2
import numpy as np
try:
from tflite_runtime.interpreter import Interpreter
except:
from tensorflow.lite.python.interpreter import Interpreter
def download_github_model(model_url, model_path):
if not os.path.exists(model_path):
model_data = urllib.request.urlopen(model_url).read()
model_bytes = bytearray(model_data)
open(model_path,"wb+").write(model_bytes)
graph_structure_config = """
input_stream: "IMAGE:image"
output_stream: "FLOATS:floats"
"""
input_config = """
# Converts the input CPU image (ImageFrame) to the multi-backend image type
# (Image).
node: {
calculator: "ToImageCalculator"
input_stream: "IMAGE_CPU:image"
output_stream: "IMAGE:multi_backend_image"
}
# Transforms the input image into a 192x192 tensor while keeping the aspect
# ratio (what is expected by the corresponding face detection model), resulting
# in potential letterboxing in the transformed image.
node: {
calculator: "ImageToTensorCalculator"
input_stream: "IMAGE:multi_backend_image"
output_stream: "TENSORS:input_tensors"
output_stream: "MATRIX:transform_matrix"
options: {
[mediapipe.ImageToTensorCalculatorOptions.ext] {
output_tensor_width: 192
output_tensor_height: 192
keep_aspect_ratio: true
output_tensor_float_range {
min: -1.0
max: 1.0
}
border_mode: BORDER_ZERO
}
}
}
"""
inference_config = """
# Runs a TensorFlow Lite model on CPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "InferenceCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "TENSORS:detection_tensors"
options: {
[mediapipe.InferenceCalculatorOptions.ext] {
model_path: "models/face_detection_full_range_sparse.tflite"
delegate {
xnnpack {}
}
}
}
}
"""
input_tensor_to_floats_config = """
node: {
calculator: "TensorsToFloatsCalculator"
input_stream: "TENSORS:input_tensors"
output_stream: "FLOATS:floats"
}
"""
detection_tensor_to_floats_config = """
node: {
calculator: "TensorsToFloatsCalculator"
input_stream: "TENSORS:detection_tensors"
output_stream: "FLOATS:floats"
}
"""
if __name__ == '__main__':
model_url = "https://github.com/google/mediapipe/blob/master/mediapipe/modules/face_detection/face_detection_full_range_sparse.tflite?raw=true"
model_path = "models/face_detection_full_range_sparse.tflite"
# Download the tflite model from the Mediapipe github
download_github_model(model_url, model_path)
# Read the image for testing
rgb_img = cv2.cvtColor(cv2.imread('images/test.jpg'), cv2.COLOR_BGR2RGB)
""" Mediapipe Inference """
# Generate the mediapipe graph to perform the image preparation and model inference
graph_config = graph_structure_config+input_config+inference_config+detection_tensor_to_floats_config
mediapipe_inference_graph = solution_base.SolutionBase(graph_config=graph_config)
# Get mediapipe raw results
mediapipe_raw_results = np.array(mediapipe_inference_graph.process(input_data={'image': rgb_img}).floats)
""" TFLite Inference """
# Generate the mediapipe graph to get the normalized image to pass to the model in tflite
graph_config = graph_structure_config+input_config+input_tensor_to_floats_config
mediapipe_input_graph = solution_base.SolutionBase(graph_config=graph_config)
# Get the normalized image from the graph
results = np.array(mediapipe_input_graph.process(input_data={'image': rgb_img}).floats, dtype=np.float32)
input_tensor = np.reshape(results,(1,192,192,3))
# Initialize the TFLite model
interpreter = Interpreter(model_path=model_path, num_threads = 4)
interpreter.allocate_tensors()
# Peform the inference with the model in TFLite
interpreter.set_tensor(interpreter.get_input_details()[0]['index'], input_tensor)
interpreter.invoke()
tflite_raw_results = np.squeeze(interpreter.get_tensor(interpreter.get_output_details()[0]['index']))
print(f"\nSum of differences: {np.sum(mediapipe_raw_results- tflite_raw_results.flatten())}\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment