Skip to content

Instantly share code, notes, and snippets.

View pashu123's full-sized avatar
๐Ÿ˜‡
Working from home

Prashant Kumar pashu123

๐Ÿ˜‡
Working from home
View GitHub Profile
func.func @matmul_broad_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32() attributes {translation_info = #iree_codegen.translation_info<Mmt4dTilingExpert>} {
%c1 = arith.constant 1 : index
%c3200 = arith.constant 3200 : index
%c540 = arith.constant 540 : index
%c55296000 = arith.constant 55296000 : index
%c0 = arith.constant 0 : index
%c32_i64 = arith.constant 32 : i64
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load[0] : i32
%1 = hal.interface.constant.load[1] : i32
func.func @matmul_broad_dispatch_2_batch_mmt4d_DxDx540x3200x16x16x1_f32xf16xf32() attributes {translation_info = #iree_codegen.translation_info<Mmt4dTilingExpert>} {
%c1 = arith.constant 1 : index
%c3200 = arith.constant 3200 : index
%c540 = arith.constant 540 : index
%c55296000 = arith.constant 55296000 : index
%c0 = arith.constant 0 : index
%c32_i64 = arith.constant 32 : i64
%cst = arith.constant 0.000000e+00 : f32
%0 = hal.interface.constant.load[0] : i32
%1 = hal.interface.constant.load[1] : i32
func.func @pad_and_pack_static(%input: tensor<13x15xf32>, %output: tensor<2x8x8x2xf32>, %pad: f32) -> tensor<2x8x8x2xf32> {
%0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
return %0 : tensor<2x8x8x2xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%1, %loops:2 = transform.structured.tile_using_for %0 tile_sizes [2, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
-- Read the docs: https://www.lunarvim.org/docs/configuration
-- Video Tutorials: https://www.youtube.com/watch?v=sFA9kX-Ud_c&list=PLhoH5vyxr6QqGu0i7tt_XoVK9v-KvZ3m6
-- Forum: https://www.reddit.com/r/lunarvim/
-- Discord: https://discord.com/invite/Xb9B4Ny
--
--
lvim.colorscheme = "lunar"
lvim.format_on_save.enabled = false
func.func @img2col(%arg0: tensor<128x1026x1026xf32>) -> tensor<128x3x3x1024x1024xbf16> {
%0 = tensor.empty() : tensor<128x3x3x1024x1024xbf16>
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%c128 = arith.constant 128 : index
%c1024 = arith.constant 1024 : index
%1 = scf.for %arg1 = %c0 to %c128 step %c1 iter_args(%arg2 = %0) -> (tensor<128x3x3x1024x1024xbf16>) {
%2 = scf.for %arg3 = %c0 to %c1024 step %c1 iter_args(%arg4 = %arg2) -> (tensor<128x3x3x1024x1024xbf16>) {
%3 = scf.for %arg5 = %c0 to %c1024 step %c1 iter_args(%arg6 = %arg4) -> (tensor<128x3x3x1024x1024xbf16>) {
import argparse
import re
parser = argparse.ArgumentParser(description='Convert parameter data type')
parser.add_argument('mlir', type=str, help='MLIR file where all parameters are mentioned')
parser.add_argument('dtype', type=str, help='Required data type of parameters')
parser.add_argument('irpa', type=str, help='destination irpa file')
args = parser.parse_args()
func.func @torch_add(%arg0: !torch.vtensor<[1,1,?,?],i1>, %arg1: !torch.vtensor<[4,1,1,?],i1>) -> !torch.vtensor<[4, 1, ?, ?],i1> {
%int1 = torch.constant.int 1
%2 = torch.aten.add.Tensor %arg0, %arg1, %int1 : !torch.vtensor<[1,1,?,?],i1>, !torch.vtensor<[4,1,1,?],i1>, !torch.int -> !torch.vtensor<[4,1,?,?],i1>
return %2 : !torch.vtensor<[4,1,?,?],i1>
}
This file has been truncated, but you can view the full file.
// -----// IR Dump After AssignTargetDevicesPass (iree-hal-assign-target-devices) //----- //
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 16 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>
#device_target_local = #hal.device.target<"local", [#executable_target_embedded_elf_x86_64_]>
module attributes {hal.device.targets = [#device_target_local]} {
func.func @torch.prims.convert_element_type$fold(%arg0: !torch.vtensor<[4,?,32,100],f32>, %arg1: !torch.vtensor<[4,?],si64>) -> !torch.vtensor<[4,?,32,50,2],f32> {
%int4 = torch.constant.int 4
%int32 = torch.constant.int 32
%int-1 = torch.constant.int -1
%int2 = torch.constant.int 2
%int1 = torch.constant.int 1
[-1.0, -0.9591836734693877, -0.9183673469387755, -0.8775510204081632, -0.8367346938775511, -0.7959183673469388, -0.7551020408163265, -0.7142857142857143, -0.6734693877551021, -0.6326530612244898, -0.5918367346938775, -0.5510204081632653, -0.5102040816326531, -0.4693877551020408, -0.4285714285714286, -0.3877551020408163, -0.34693877551020413, -0.30612244897959184, -0.26530612244897955, -0.22448979591836737, -0.18367346938775508, -0.1428571428571429, -0.10204081632653061, -0.061224489795918324, -0.020408163265306145, 0.020408163265306145, 0.061224489795918435, 0.1020408163265305, 0.1428571428571428, 0.18367346938775508, 0.22448979591836737, 0.26530612244897966, 0.30612244897959173, 0.346938775510204, 0.3877551020408163, 0.4285714285714286, 0.4693877551020409, 0.510204081632653, 0.5510204081632653, 0.5918367346938775, 0.6326530612244898, 0.6734693877551021, 0.7142857142857142, 0.7551020408163265, 0.7959183673469388, 0.8367346938775511, 0.8775510204081634, 0.9183673469387754, 0.9591836734693877, 1.0]
Actual: 3.14
import math
def fma(a, b, c):
return a * b + c
def asin_core(a):
s = a * a
q = s * s
r = 5.5579749017470502e-2
t = -6.2027913464120114e-2