benvanik’s gists

benvanik / 0-input.mlir

Last active June 2, 2025 19:11

PR20855 IR examples

	// tools/test/iree-run-module-multi.mlir

	func.func public @multi_device_mul(
	// Input argument is resident on device_a (tooling default to first device).
	%input_a: tensor<4xf32> {iree.abi.affinity = #hal.device.promise<@device_a>}
	) -> (
	// Output result is expected to be on device_a (though not required).
	tensor<4xf32> {iree.abi.affinity = #hal.device.promise<@device_a>}
	) {
	// Compute on device_a (input is there).

benvanik / build_kernels_c.sh

Last active November 13, 2024 16:19

hsa prototypes

	set -x

	~/src/iree-build/llvm-project/bin/clang \
	-x c -std=c23 \
	-target amdgcn-amd-amdhsa -march=gfx1100 \
	-nogpulib \
	-fgpu-rdc \
	-fno-short-wchar \
	-fno-ident \
	-Xclang -finclude-default-header \

benvanik / methods.mlir

Last active September 14, 2022 14:35

vmvx notes

	vm.import @vmvx.add.2d.f32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
	vm.import @vmvx.add.2d.i32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
	vm.import @vmvx.and.2d.i32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
	vm.import @vmvx.div.2d.f32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_st

benvanik / unidirectional_lstm.mlir

Created June 15, 2022 19:51

unidirectional_lstm.mlir

This file has been truncated, but you can view the full file.

	// -----// IR Dump After TopLevelSCFToCFG //----- //
	func.func private @ForwardLoopCond_gFAnjWGSoLs__.167(%arg0: tensor<i64>, %arg1: tensor<i64>, %arg2: tensor<40xf32>, %arg3: tensor<i64>, %arg4: tensor<74x40xf32>, %arg5: tensor<i64>, %arg6: tensor<1x10xf32>, %arg7: tensor<1x10xf32>, %arg8: tensor<5x1x64xf32>, %arg9: tensor<5x1x1xf32>, %arg10: tensor<5x1x1xf32>, %arg11: tensor<5xi64>, %arg12: tensor<5x1x10xf32>, %arg13: tensor<5x1x10xf32>) -> tensor<i1> {
	%0 = "mhlo.compare"(%arg0, %arg1) {comparison_direction = #mhlo<"comparison_direction LT">} : (tensor<i64>, tensor<i64>) -> tensor<i1>
	return %0 : tensor<i1>
	}

	// -----// IR Dump After MHLOToMHLOPreprocessing //----- //
	func.func private @ForwardLoopCond_gFAnjWGSoLs__.167(%arg0: tensor<i64>, %arg1: tensor<i64>, %arg2: tensor<40xf32>, %arg3: tensor<i64>, %arg4: tensor<74x40xf32>, %arg5: tensor<i64>, %arg6: tensor<1x10xf32>, %arg7: tensor<1x10xf32>, %arg8: tensor<5x1x64xf32>, %arg9: tensor<5x1x1xf32>, %arg10: tensor<5x1x1xf32>, %arg11: tensor<5xi64>, %arg

benvanik / launch.json

Created June 15, 2022 19:41

simple_mul.mlir

	{
	"name": "(gdb) iree-compile",
	"type": "cppdbg",
	"request": "launch",
	"preLaunchTask": "build-iree-compile",
	"program": "${command:cmake.buildDirectory}/tools/iree-compile",
	"args": [
	// "-iree-vm-bytecode-module-output-format=annotated-mlir-text",
	"-iree-vm-bytecode-source-listing=${workspaceFolder}/../iree-tmp/vm.mlir",
	"-iree-vm-emit-polyglot-zip=true",

benvanik / api_interfaces_cc.h

Created January 6, 2021 00:12

WIP api_interfaces_cc.h example for #iree/4369

	// Copyright 2020 Google LLC
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// https://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,

benvanik / 0-static-input.mlir

Last active December 15, 2020 23:53

tiled dispatch

	// Simple dispatch of static shapes.
	func @staticShapeDispatch(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
	%x = constant 100 : index
	%y = constant 50 : index
	// %x, %y here are the workgroup counts along a 2D grid to dispatch; backends turn them into 3D XYZ.
	%0 = flow.dispatch.workgroups[%x, %y](%arg0) : (tensor<8x4xf32>) -> (tensor<4x8xf32>) = (
	// I/O are modeled in the region as ref arguments that have some special ops available.
	%arg : !flow.dispatch.input<8x4xf32>, %ret : !flow.dispatch.output<4x8xf32>
	) {
	// Loads a tensor from an input; can be tiled with offsets/sizes/strides.

benvanik / dispatch_workgroups.mlir

Last active December 11, 2020 20:44

	// RUN: iree-opt -allow-unregistered-dialect -split-input-file %s \| iree-opt -allow-unregistered-dialect -split-input-file \| IreeFileCheck %s

	func @workgroups(%arg0 : tensor<?x4xf32>, %arg1 : index) -> tensor<4x?xf32> {
	%x = constant 100 : index
	%y = constant 50 : index
	%0 = flow.dispatch.workgroups[%x, %y](%arg0, %arg1) : (tensor<?x4xf32>, index) -> (tensor<4x?xf32>) =
	(%arg0_capture : !flow.dispatch.input<?x4xf32>, %arg1_capture : index, %ret0 : !flow.dispatch.output<4x?xf32>) {

	// Query symbolic workgroup info:

benvanik / add.mlir

Last active November 23, 2020 12:53

TFLite -> IREE synthesis example


	//===----------------------------------------------------------------------===//
	// b = add(a, a)
	//===----------------------------------------------------------------------===//

	// NOTE: this represents what our tflite import flow should produce; the _
	// prefixed functions are all synthesized by us. We use the VM dialect in here
	// now because std has no list and other stuff. In a real flow we may have a
	// iree_tflite dialect that has pseudo ops for these things that then plug into
	// the VM conversion interface, or maybe we just emit them as-is at input

benvanik / iree-wasm.md

Last active November 20, 2020 08:05

IREE WebAssembly executable kernel manifesto

This is the shape of what we are talking about:

// Let's run a tile worth of work within a larger grid dispatch. That grid is
// defined *by us in the compiler* - it could, for example be a grid of 1x1x1
// such that this function is called once. Or, if there was benefit, you could
// make it go wider (like how ruy fans out work to a threadpool). And you can
// emit the code to choose the grid size/shape at runtime based on anything you
// want. That's what IREE gives you today. This here is your executable kernel
// equivalent to CUDA kernel or compute shader.

Ben Vanik benvanik