tarun-ssharma · March 8, 2022 12:21
diff --git a/quantized-inference-example.py b/quantized-inference-example.py
 # TFLite quantized inference example
 #
 # Based on:
 # https://www.tensorflow.org/lite/performance/post_training_integer_quant
 # https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/Tensor.QuantizationParams

 import numpy as np
 import tensorflow as tf

 # Location of tflite model file (float32 or int8 quantized)
 model_path = "my-model-file.lite"

 # Processed features (copy from Edge Impulse project)
 features = [
  # <COPY FEATURES HERE!>
 ]
  
 # Load TFLite model and allocate tensors.
 interpreter = tf.lite.Interpreter(model_path=model_path)

 # Get input and output tensors.
 input_details = interpreter.get_input_details()
 output_details = interpreter.get_output_details()

 # Allocate tensors
 interpreter.allocate_tensors()

 # Print the input and output details of the model
 print()
 print("Input details:")
 print(input_details)
 print()
 print("Output details:")
 print(output_details)
 print()

 # Convert features to NumPy array
 np_features = np.array(features)

 # If the expected input type is int8 (quantized model), rescale data
 input_type = input_details[0]['dtype']
 if input_type == np.int8:
    input_scale, input_zero_point = input_details[0]['quantization']
    print("Input scale:", input_scale)
    print("Input zero point:", input_zero_point)
    print()
    np_features = (np_features / input_scale) + input_zero_point
    np_features = np.around(np_features)
    
 # Convert features to NumPy array of expected type
 np_features = np_features.astype(input_type)

 # Add dimension to input sample (TFLite model expects (# samples, data))
 np_features = np.expand_dims(np_features, axis=0)

 # Create input tensor out of raw features
 interpreter.set_tensor(input_details[0]['index'], np_features)

 # Run inference
 interpreter.invoke()

 # output_details[0]['index'] = the index which provides the input
 output = interpreter.get_tensor(output_details[0]['index'])

 # If the output type is int8 (quantized model), rescale data
 output_type = output_details[0]['dtype']
 if output_type == np.int8:
    output_scale, output_zero_point = output_details[0]['quantization']
    print("Raw output scores:", output)
    print("Output scale:", output_scale)
    print("Output zero point:", output_zero_point)
    print()
    output = output_scale * (output.astype(np.float32) - output_zero_point)

 # Print the results of inference
 print("Inference output:", output)
	# TFLite quantized inference example
	#
	# Based on:
	# https://www.tensorflow.org/lite/performance/post_training_integer_quant
	# https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/Tensor.QuantizationParams

	import numpy as np
	import tensorflow as tf

	# Location of tflite model file (float32 or int8 quantized)
	model_path = "my-model-file.lite"

	# Processed features (copy from Edge Impulse project)
	features = [
	# <COPY FEATURES HERE!>
	]

	# Load TFLite model and allocate tensors.
	interpreter = tf.lite.Interpreter(model_path=model_path)

	# Get input and output tensors.
	input_details = interpreter.get_input_details()
	output_details = interpreter.get_output_details()

	# Allocate tensors
	interpreter.allocate_tensors()

	# Print the input and output details of the model
	print()
	print("Input details:")
	print(input_details)
	print()
	print("Output details:")
	print(output_details)
	print()

	# Convert features to NumPy array
	np_features = np.array(features)

	# If the expected input type is int8 (quantized model), rescale data
	input_type = input_details[0]['dtype']
	if input_type == np.int8:
	input_scale, input_zero_point = input_details[0]['quantization']
	print("Input scale:", input_scale)
	print("Input zero point:", input_zero_point)
	print()
	np_features = (np_features / input_scale) + input_zero_point
	np_features = np.around(np_features)

	# Convert features to NumPy array of expected type
	np_features = np_features.astype(input_type)

	# Add dimension to input sample (TFLite model expects (# samples, data))
	np_features = np.expand_dims(np_features, axis=0)

	# Create input tensor out of raw features
	interpreter.set_tensor(input_details[0]['index'], np_features)

	# Run inference
	interpreter.invoke()

	# output_details[0]['index'] = the index which provides the input
	output = interpreter.get_tensor(output_details[0]['index'])

	# If the output type is int8 (quantized model), rescale data
	output_type = output_details[0]['dtype']
	if output_type == np.int8:
	output_scale, output_zero_point = output_details[0]['quantization']
	print("Raw output scores:", output)
	print("Output scale:", output_scale)
	print("Output zero point:", output_zero_point)
	print()
	output = output_scale * (output.astype(np.float32) - output_zero_point)

	# Print the results of inference
	print("Inference output:", output)