Skip to content

Instantly share code, notes, and snippets.

@MaximKsh
Created January 20, 2020 13:26
Show Gist options
  • Save MaximKsh/baeb582fc73ed9ecfd5562b6f6684114 to your computer and use it in GitHub Desktop.
Save MaximKsh/baeb582fc73ed9ecfd5562b6f6684114 to your computer and use it in GitHub Desktop.
// Copyright 2019 The MediaPipe Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <android/log.h>
#include "mediapipe//framework/packet.h"
#include "mediapipe/calculators/util/detection_label_id_to_text_calculator.pb.h"
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/detection.pb.h"
#include "mediapipe/framework/port/status.h"
#include "mediapipe/util/resource_util.h"
#if defined(MEDIAPIPE_MOBILE)
#include "mediapipe/util/android/file/base/file.h"
#include "mediapipe/util/android/file/base/helpers.h"
#else
#include "mediapipe/framework/port/file_helpers.h"
#endif
namespace mediapipe {
// Takes a label map (from label IDs to names), and replaces the label IDs
// in Detection protos with label names. Note that the calculator makes a copy
// of the input detections. Consider using it only when the size of input
// detections is small.
//
// Example usage:
// node {
// calculator: "DetectionLabelIdToTextCalculator"
// input_stream: "input_detections"
// output_stream: "output_detections"
// node_options: {
// [type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
// label_map_path: "labelmap.txt"
// }
// }
// }
class DetectionLabelIdToTextCalculator : public CalculatorBase {
public:
static ::mediapipe::Status GetContract(CalculatorContract* cc);
::mediapipe::Status Open(CalculatorContext* cc) override;
::mediapipe::Status Process(CalculatorContext* cc) override;
private:
std::unordered_map<int, std::string> label_map_;
};
REGISTER_CALCULATOR(DetectionLabelIdToTextCalculator);
::mediapipe::Status DetectionLabelIdToTextCalculator::GetContract(
CalculatorContract* cc) {
cc->Inputs().Get("", 0).Set<std::vector<Detection>>();
cc->Outputs().Index(0).Set<std::vector<Detection>>();
cc->Inputs().Get("SECOND", 0).SetAny();
return ::mediapipe::OkStatus();
}
::mediapipe::Status DetectionLabelIdToTextCalculator::Open(
CalculatorContext* cc) {
cc->SetOffset(TimestampDiff(0));
__android_log_print(ANDROID_LOG_ERROR, "MediapipeTest", "%s", "Open");
const auto& options =
cc->Options<::mediapipe::DetectionLabelIdToTextCalculatorOptions>();
std::string string_path;
ASSIGN_OR_RETURN(string_path, PathToResourceAsFile(options.label_map_path()));
std::string label_map_string;
MP_RETURN_IF_ERROR(file::GetContents(string_path, &label_map_string));
std::istringstream stream(label_map_string);
std::string line;
int i = 0;
while (std::getline(stream, line)) {
label_map_[i++] = line;
}
return ::mediapipe::OkStatus();
}
::mediapipe::Status DetectionLabelIdToTextCalculator::Process(
CalculatorContext* cc) {
__android_log_print(ANDROID_LOG_ERROR, "MediapipeTest", "%s", "never print");
std::vector<Detection> output_detections;
for (const auto& input_detection :
cc->Inputs().Index(0).Get<std::vector<Detection>>()) {
output_detections.push_back(input_detection);
Detection& output_detection = output_detections.back();
bool has_text_label = false;
for (const int32 label_id : output_detection.label_id()) {
if (label_map_.find(label_id) != label_map_.end()) {
output_detection.add_label(label_map_[label_id]);
has_text_label = true;
}
}
// Remove label_id field if text labels exist.
if (has_text_label) {
output_detection.clear_label_id();
}
}
cc->Outputs().Index(0).AddPacket(
MakePacket<std::vector<Detection>>(output_detections)
.At(cc->InputTimestamp()));
return ::mediapipe::OkStatus();
}
} // namespace mediapipe
# MediaPipe graph that performs object detection with TensorFlow Lite on GPU.
# Used in the examples in
# mediapipe/examples/android/src/java/com/mediapipe/apps/objectdetectiongpu and
# mediapipe/examples/ios/objectdetectiongpu.
# Images on GPU coming into and out of the graph.
input_stream: "input_video"
input_stream: "second_input"
output_stream: "output_video"
# Throttles the images flowing downstream for flow control. It passes through
# the very first incoming image unaltered, and waits for
# TfLiteTensorsToDetectionsCalculator downstream in the graph to finish
# generating the corresponding detections before it passes through another
# image. All images that come in while waiting are dropped, limiting the number
# of in-flight images between this calculator and
# TfLiteTensorsToDetectionsCalculator to 1. This prevents the nodes in between
# from queuing up incoming images and data excessively, which leads to increased
# latency and memory usage, unwanted in real-time mobile applications. It also
# eliminates unnecessarily computation, e.g., a transformed image produced by
# ImageTransformationCalculator may get dropped downstream if the subsequent
# TfLiteConverterCalculator or TfLiteInferenceCalculator is still busy
# processing previous inputs.
node {
calculator: "FlowLimiterCalculator"
input_stream: "input_video"
input_stream: "FINISHED:detections"
input_stream_info: {
tag_index: "FINISHED"
back_edge: true
}
output_stream: "throttled_input_video"
}
# Transforms the input image on GPU to a 320x320 image. To scale the image, by
# default it uses the STRETCH scale mode that maps the entire input image to the
# entire transformed image. As a result, image aspect ratio may be changed and
# objects in the image may be deformed (stretched or squeezed), but the object
# detection model used in this graph is agnostic to that deformation.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:throttled_input_video"
output_stream: "IMAGE_GPU:transformed_input_video"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 320
output_height: 320
}
}
}
# Converts the transformed input image on GPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE_GPU:transformed_input_video"
output_stream: "TENSORS_GPU:image_tensor"
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS_GPU:image_tensor"
output_stream: "TENSORS_GPU:detection_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/ssdlite_object_detection.tflite"
}
}
}
# Generates a single side packet containing a vector of SSD anchors based on
# the specification in the options.
node {
calculator: "SsdAnchorsCalculator"
output_side_packet: "anchors"
node_options: {
[type.googleapis.com/mediapipe.SsdAnchorsCalculatorOptions] {
num_layers: 6
min_scale: 0.2
max_scale: 0.95
input_size_height: 320
input_size_width: 320
anchor_offset_x: 0.5
anchor_offset_y: 0.5
strides: 16
strides: 32
strides: 64
strides: 128
strides: 256
strides: 512
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
reduce_boxes_in_lowest_layer: true
}
}
}
# Decodes the detection tensors generated by the TensorFlow Lite model, based on
# the SSD anchors and the specification in the options, into a vector of
# detections. Each detection describes a detected object.
node {
calculator: "TfLiteTensorsToDetectionsCalculator"
input_stream: "TENSORS_GPU:detection_tensors"
input_side_packet: "ANCHORS:anchors"
output_stream: "DETECTIONS:detections"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToDetectionsCalculatorOptions] {
num_classes: 91
num_boxes: 2034
num_coords: 4
ignore_classes: 0
sigmoid_score: true
apply_exponential_on_box_size: true
x_scale: 10.0
y_scale: 10.0
h_scale: 5.0
w_scale: 5.0
min_score_thresh: 0.6
}
}
}
# Performs non-max suppression to remove excessive detections.
node {
calculator: "NonMaxSuppressionCalculator"
input_stream: "detections"
output_stream: "filtered_detections"
node_options: {
[type.googleapis.com/mediapipe.NonMaxSuppressionCalculatorOptions] {
min_suppression_threshold: 0.4
max_num_detections: 3
overlap_type: INTERSECTION_OVER_UNION
return_empty_detections: true
}
}
}
# Maps detection label IDs to the corresponding label text. The label map is
# provided in the label_map_path option.
node {
calculator: "DetectionLabelIdToTextCalculator"
input_stream: "filtered_detections"
input_stream: "SECOND:second_input"
output_stream: "output_detections"
node_options: {
[type.googleapis.com/mediapipe.DetectionLabelIdToTextCalculatorOptions] {
label_map_path: "mediapipe/models/ssdlite_object_detection_labelmap.txt"
}
}
}
# Converts the detections to drawing primitives for annotation overlay.
node {
calculator: "DetectionsToRenderDataCalculator"
input_stream: "DETECTIONS:output_detections"
output_stream: "RENDER_DATA:render_data"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRenderDataCalculatorOptions] {
thickness: 4.0
color { r: 255 g: 0 b: 0 }
}
}
}
# Draws annotations and overlays them on top of the input images.
node {
calculator: "AnnotationOverlayCalculator"
input_stream: "INPUT_FRAME_GPU:throttled_input_video"
input_stream: "render_data"
output_stream: "OUTPUT_FRAME_GPU:output_video"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment