Skip to content

Instantly share code, notes, and snippets.

@benvanik
benvanik / build_kernels_c.sh
Last active November 13, 2024 16:19
hsa prototypes
set -x
~/src/iree-build/llvm-project/bin/clang \
-x c -std=c23 \
-target amdgcn-amd-amdhsa -march=gfx1100 \
-nogpulib \
-fgpu-rdc \
-fno-short-wchar \
-fno-ident \
-Xclang -finclude-default-header \
@benvanik
benvanik / methods.mlir
Last active September 14, 2022 14:35
vmvx notes
vm.import @vmvx.add.2d.f32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
vm.import @vmvx.add.2d.i32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
vm.import @vmvx.and.2d.i32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_strides : tuple<i64, i64>, %out_buffer : !vm.buffer, %out_offset : i64, %out_strides : tuple<i64, i64>, %sizes : tuple<i64, i64>)
vm.import @vmvx.div.2d.f32(%lhs_buffer : !vm.buffer, %lhs_offset : i64, %lhs_strides : tuple<i64, i64>, %rhs_buffer : !vm.buffer, %rhs_offset : i64, %rhs_st
@benvanik
benvanik / unidirectional_lstm.mlir
Created June 15, 2022 19:51
unidirectional_lstm.mlir
This file has been truncated, but you can view the full file.
// -----// IR Dump After TopLevelSCFToCFG //----- //
func.func private @ForwardLoopCond_gFAnjWGSoLs__.167(%arg0: tensor<i64>, %arg1: tensor<i64>, %arg2: tensor<40xf32>, %arg3: tensor<i64>, %arg4: tensor<74x40xf32>, %arg5: tensor<i64>, %arg6: tensor<1x10xf32>, %arg7: tensor<1x10xf32>, %arg8: tensor<5x1x64xf32>, %arg9: tensor<5x1x1xf32>, %arg10: tensor<5x1x1xf32>, %arg11: tensor<5xi64>, %arg12: tensor<5x1x10xf32>, %arg13: tensor<5x1x10xf32>) -> tensor<i1> {
%0 = "mhlo.compare"(%arg0, %arg1) {comparison_direction = #mhlo<"comparison_direction LT">} : (tensor<i64>, tensor<i64>) -> tensor<i1>
return %0 : tensor<i1>
}
// -----// IR Dump After MHLOToMHLOPreprocessing //----- //
func.func private @ForwardLoopCond_gFAnjWGSoLs__.167(%arg0: tensor<i64>, %arg1: tensor<i64>, %arg2: tensor<40xf32>, %arg3: tensor<i64>, %arg4: tensor<74x40xf32>, %arg5: tensor<i64>, %arg6: tensor<1x10xf32>, %arg7: tensor<1x10xf32>, %arg8: tensor<5x1x64xf32>, %arg9: tensor<5x1x1xf32>, %arg10: tensor<5x1x1xf32>, %arg11: tensor<5xi64>, %arg
@benvanik
benvanik / launch.json
Created June 15, 2022 19:41
simple_mul.mlir
{
"name": "(gdb) iree-compile",
"type": "cppdbg",
"request": "launch",
"preLaunchTask": "build-iree-compile",
"program": "${command:cmake.buildDirectory}/tools/iree-compile",
"args": [
// "-iree-vm-bytecode-module-output-format=annotated-mlir-text",
"-iree-vm-bytecode-source-listing=${workspaceFolder}/../iree-tmp/vm.mlir",
"-iree-vm-emit-polyglot-zip=true",
@benvanik
benvanik / api_interfaces_cc.h
Created January 6, 2021 00:12
WIP api_interfaces_cc.h example for #iree/4369
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
@benvanik
benvanik / 0-static-input.mlir
Last active December 15, 2020 23:53
tiled dispatch
// Simple dispatch of static shapes.
func @staticShapeDispatch(%arg0 : tensor<8x4xf32>) -> tensor<4x8xf32> {
%x = constant 100 : index
%y = constant 50 : index
// %x, %y here are the workgroup counts along a 2D grid to dispatch; backends turn them into 3D XYZ.
%0 = flow.dispatch.workgroups[%x, %y](%arg0) : (tensor<8x4xf32>) -> (tensor<4x8xf32>) = (
// I/O are modeled in the region as ref arguments that have some special ops available.
%arg : !flow.dispatch.input<8x4xf32>, %ret : !flow.dispatch.output<4x8xf32>
) {
// Loads a tensor from an input; can be tiled with offsets/sizes/strides.
// RUN: iree-opt -allow-unregistered-dialect -split-input-file %s | iree-opt -allow-unregistered-dialect -split-input-file | IreeFileCheck %s
func @workgroups(%arg0 : tensor<?x4xf32>, %arg1 : index) -> tensor<4x?xf32> {
%x = constant 100 : index
%y = constant 50 : index
%0 = flow.dispatch.workgroups[%x, %y](%arg0, %arg1) : (tensor<?x4xf32>, index) -> (tensor<4x?xf32>) =
(%arg0_capture : !flow.dispatch.input<?x4xf32>, %arg1_capture : index, %ret0 : !flow.dispatch.output<4x?xf32>) {
// Query symbolic workgroup info:
@benvanik
benvanik / add.mlir
Last active November 23, 2020 12:53
TFLite -> IREE synthesis example
//===----------------------------------------------------------------------===//
// b = add(a, a)
//===----------------------------------------------------------------------===//
// NOTE: this represents what our tflite import flow should produce; the _
// prefixed functions are all synthesized by us. We use the VM dialect in here
// now because std has no list and other stuff. In a real flow we may have a
// iree_tflite dialect that has pseudo ops for these things that then plug into
// the VM conversion interface, or maybe we just emit them as-is at input
@benvanik
benvanik / iree-wasm.md
Last active November 20, 2020 08:05
IREE WebAssembly executable kernel manifesto

This is the shape of what we are talking about:

// Let's run a tile worth of work within a larger grid dispatch. That grid is
// defined *by us in the compiler* - it could, for example be a grid of 1x1x1
// such that this function is called once. Or, if there was benefit, you could
// make it go wider (like how ruy fans out work to a threadpool). And you can
// emit the code to choose the grid size/shape at runtime based on anything you
// want. That's what IREE gives you today. This here is your executable kernel
// equivalent to CUDA kernel or compute shader.
Ref Val Package PosX PosY Rot Side
C3 1uF/16V C_0402_1005Metric -46.872500 -5.840000 180.000000 bottom
C5 0.01uF C_0201_0603Metric -20.400000 -15.455000 90.000000 bottom
C6 0.1uF C_0201_0603Metric -20.500000 -13.850000 -90.000000 bottom
C7 0.01uF C_0201_0603Metric -17.700000 -15.050000 0.000000 bottom
C8 0.1uF C_0201_0603Metric -19.305000 -15.050000 180.000000 bottom
C11 0.01uF C_0201_0603Metric -20.100000 -18.750000 90.000000 bottom
C12 0.1uF C_0201_0603Metric -20.900000 -18.750000 90.000000 bottom
C13 0.01uF C_0201_0603Metric -18.500000 -19.600000 90.000000 bottom
C14 0.1uF C_0201_0603Metric -22.500000 -15.000000 180.000000 bottom