antiagainst’s gists

antiagainst / adreno-perfcounter.c

Last active May 23, 2024 11:12

Adreno Perf Counter Queries

	#include <fcntl.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <string.h>
	#include <sys/ioctl.h>
	#include <unistd.h>

	#define ADRENO_IOC_TYPE 0x09

	#define ADRENO_PERFCOUNTER_GROUP_SP 0xA

antiagainst / mali-print-info.c

Created July 8, 2021 21:31

Print Mali GPU Properties

antiagainst / regenerate_benchmark_input_models.sh

Last active October 14, 2021 15:04

	#!/bin/bash

	# Assumes in iree's source root directory.

	export CC=$(which clang)
	export CXX=$(which clang++)

	pushd integrations/tensorflow
	python ../../configure_bazel.py
	bazel build iree_tf_compiler:importer-binaries

antiagainst / MobileNetV2-2021-08-09.mlir

Last active August 9, 2021 21:48

This file has been truncated, but you can view the full file.

	// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- //
	builtin.module {
	flow.variable @"__iree_flow___sm_node163__m.layer-1.kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x3x32xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node169__m.layer-2.gamma" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node170__m.layer-2.beta" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node171__m.layer-2.moving_mean" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node172__m.layer-2.moving_variance" opaque<"_", "0xDEADBEEF"> : tensor<32xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow___sm_node181__m.layer-4.depthwise_kernel" opaque<"_", "0xDEADBEEF"> : tensor<3x3x32x1xf32> attributes {sym_visibility = "private"}
	flow.variable

antiagainst / MobileBertSquad-2021-08-09.mlir

Last active August 9, 2021 21:46

This file has been truncated, but you can view the full file.

	// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- //
	builtin.module {
	flow.variable @"__iree_flow_bert/embeddings/FakeLayerNorm/beta" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow_bert/embeddings/FakeLayerNorm/gamma" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow_bert/embeddings/embedding_transformation/bias" opaque<"_", "0xDEADBEEF"> : tensor<512xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow_bert/embeddings/embedding_transformation/kernel" opaque<"_", "0xDEADBEEF"> : tensor<384x512xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow_bert/embeddings/position_embeddings" opaque<"_", "0xDEADBEEF"> : tensor<512x512xf32> attributes {sym_visibility = "private"}
	flow.variable @"__iree_flow_bert/embeddings/token_type_embeddings" opaque<"_", "0xDEADBEEF"> : tensor<2x512xf32> attributes {sym_vis

antiagainst / 1v2.md

Last active November 19, 2021 15:47

Full Benchmark Summary

Benchmark Name	Average Latency (ms)	Median Latency (ms)	Latency Standard Deviation (ms)
[MobileNetV2 [fp32,imagenet] (TensorFlow) kernel-execution,experimental-flags with IREE-Vulkan @ Pixel-6 (GPU-Mali-G78)](https://perf.iree.dev/serie?IREE?MobileNetV2%20[fp32,imagenet]%20(TensorFlow)%20ke

antiagainst / conv-conversion.mlir

Last active September 19, 2021 21:14

This file has been truncated, but you can view the full file.

	// -----// IR Dump After mlir::iree_compiler::IREE::ABI::WrapEntryPointsPass //----- //
	module {
	func @conv(%arg0: !hal.buffer_view, %arg1: !hal.buffer_view) -> !hal.buffer_view attributes {iree.abi.stub} {
	%0 = hal.tensor.cast %arg0 : !hal.buffer_view -> tensor<1x225x225x3xf32>
	%1 = hal.tensor.cast %arg1 : !hal.buffer_view -> tensor<3x3x3x32xf32>
	%2 = call @_conv(%0, %1) : (tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
	%3 = hal.tensor.cast %2 : tensor<1x112x112x32xf32> -> !hal.buffer_view
	return %3 : !hal.buffer_view
	}
	func private @_conv(%arg0: tensor<1x225x225x3xf32>, %arg1: tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32> {

antiagainst / rank-reducing-failure.mlir

Last active October 28, 2021 20:50

	// -----// IR Dump Before LinalgBufferize //----- //
	func @UpSampling3D__2x8x8x8x3__f32__uniform_dispatch_32() {
	%c3 = arith.constant 3 : index
	%c16 = arith.constant 16 : index
	%c0 = arith.constant 0 : index
	%0 = hal.interface.binding.subspan @io::@s0b0_rw_external[%c0] : !flow.dispatch.tensor<readwrite:2x16x16x16x3xf32>
	%1 = hal.interface.binding.subspan @io::@s0b1_ro_external[%c0] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32>
	%2 = flow.dispatch.tensor.load %1, offsets = [], sizes = [], strides = [] : !flow.dispatch.tensor<readonly:2x16x16x8x3xf32> -> tensor<2x16x16x8x3xf32>
	%3 = tensor.extract_slice %2[0, 0, 0, 0, 0] [2, 16, 16, 1, 3] [1, 1, 1, 1, 1] : tensor<2x16x16x8x3xf32> to tensor<2x16x16x3xf32>
	%workgroup_size_x = hal.interface.workgroup.size[0] : index

antiagainst / stream-default.md

Created December 1, 2021 17:00

Stream as default (before vs. after)

Benchmark Name	Average Latency (ms)	Median Latency (ms)	Latency Standard Deviation (ms)
MobileSSD [fp32] (TFLite) kernel-execution,experimental-flags with IREE-Vulkan @ Pixel-6 (GPU-Mali-G78)	67 (vs. 50, 34.00%↑

antiagainst / conv-pad.mlir

Created December 9, 2021 22:48

	// -----// IR Dump Before TopLevelSCFToCFG //----- //
	func @conv_pad(%arg0: tensor<1x224x224x3xf32>, %arg1: tensor<3x3x3x32xf32>, %arg2: tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> {
	%0 = mhlo.convolution(%arg0, %arg1) dim_numbers = [b, 0, 1, f]x[0, 1, i, o]->[b, 0, 1, f], window = {stride = [2, 2], pad = [[0, 1], [0, 1]], rhs_dilate = [1, 1]} {batch_group_count = 1 : i64, feature_group_count = 1 : i64} : (tensor<1x224x224x3xf32>, tensor<3x3x3x32xf32>) -> tensor<1x112x112x32xf32>
	%1 = mhlo.subtract %0, %arg2 : tensor<1x112x112x32xf32>
	return %1 : tensor<1x112x112x32xf32>
	}

	// -----// IR Dump Before SPIRVTile //----- //
	func @conv_pad_dispatch_0() {
	%cst = arith.constant 0.000000e+00 : f32