Skip to content

Instantly share code, notes, and snippets.

View pashu123's full-sized avatar
๐Ÿ˜‡
Working from home

Prashant Kumar pashu123

๐Ÿ˜‡
Working from home
View GitHub Profile
This file has been truncated, but you can view the full file.
Args: iree-opt --pass-pipeline=builtin.module(func.func(iree-codegen-tile-and-distribute-to-workgroups-using-forall-op, cse)) --mlir-print-local-scope --split-input-file before_scf.mlir --debug
ImplicitTypeIDRegistry::lookupOrInsert(mlir::chlo::ChloDialect)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::stablehlo::StablehloDialect)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::vhlo::VhloDialect)
Load new dialect in Context builtin
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DistinctAttr)
This file has been truncated, but you can view the full file.
Args: iree-opt --pass-pipeline=builtin.module(func.func(iree-codegen-tile-and-distribute-to-workgroups-using-forall-op, cse)) --mlir-print-local-scope --split-input-file before_scf.mlir --debug
ImplicitTypeIDRegistry::lookupOrInsert(mlir::chlo::ChloDialect)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::stablehlo::StablehloDialect)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::vhlo::VhloDialect)
Load new dialect in Context builtin
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr)
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DistinctAttr)
func.func @time_out(%arg0: tensor<1x1x288x8x4xf32>, %arg1: tensor<1152xf32>) -> tensor<1x1x1152xf32> {
%c0 = arith.constant 0 : index
%5 = tensor.empty() : tensor<1x1x1152xf32>
%unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32>
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1, %unpack : tensor<1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) {
^bb0(%in: f32, %in_0: f32, %out: f32):
%7 = arith.addf %in, %in_0 : f32
linalg.yield %7 : f32
} -> tensor<1x1x1152xf32>
return %6: tensor<1x1x1152xf32>
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
#map1 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
module {
func.func @test_dispatch(%arg0: tensor<1x2x128xf32>, %arg1: tensor<1x2x48x30x30xf32>, %arg2: tensor<2x128x48x5x5xf32>) -> (tensor<1x2x128x26x26xf32>, tensor<1x2x128x26x26xf32>) {
%cst = arith.constant 0.000000e+00 : f32
%c40896 = arith.constant 40896 : index
%c3720640 = arith.constant 3720640 : index
%c259584 = arith.constant 259584 : index
%c605184 = arith.constant 605184 : index
func.func @scatter_dispatch_0_scatter_2x2xi32_dispatch_tensor_store() attributes {translation_info = #iree_codegen.translation_info<CPUDefault>} {
%c0 = arith.constant 0 : index
%0:3 = util.assume.int
%c0<umin = 0, umax = 0>,
%c0<umin = 0, umax = 0>,
%c0<umin = 0, umax = 0>
: index, index, index
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%0#0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<2xi32>>
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%0#1) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<read
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
module {
func.func @scatter() -> tensor<2x2xi32> {
%0 = util.unfoldable_constant dense<0> : tensor<2x2xi32>
%1 = util.unfoldable_constant dense<1> : tensor<2xi32>
%2 = util.unfoldable_constant dense<[[0, 0], [1, 1]]> : tensor<2x2xi32>
%3 = iree_linalg_ext.scatter dimension_map = [0, 1] unique_indices(true) ins(%1, %2 : tensor<2xi32>, tensor<2x2xi32>) outs(%0 : tensor<2x2xi32>) {
^bb0(%arg0: i32, %arg1: i32):
iree_linalg_ext.yield %arg0 : i32
} -> tensor<2x2xi32>
-----BEGIN PGP PUBLIC KEY BLOCK-----
mQINBGcOLi4BEACjaOG2i+1QKS/qVZE9O1Kc9UvlMuL4vL/YADcl9WzWZfC+jPcN
1mPO09qhAyO6CmB9jcmqZP7Utow+6ym/mZGFeHWaFC7KOojxUlPGvjFPHAFfKpQ8
cN+QGkmhDhcj0DD/niXuzgf3LHnssJp8S8LvrHCz/q/7oA1Ou5gumYplpJdCPXqV
rKXr3kxhuBufga9kQbjgODcAeG79eA+Gla80wDC+WvsZh01xN+kNkqkAJdnC72fz
nNlCXldQbyrofqGWrv3lXcQ1EQptqliseOT8nvLgawkomTYRIah9T5rMZ3PqSFpk
AcgKakHagsykMa5RqdmNKLrUaMm2WvHRloeL+ql/PqAyxQBbwgSoUcajt46MC2Mg
jqW9rsZTDD+jpMy8hbYBueoCBa2/4PRZU1rxHC1rP98WCvFK0eq9pYcR4nxQbqBL
dqAo/9r1aF47QRyfvYQO5fu2syBPooQAs0UA5qkwfsnVF6UQ3AMbFNLvl7Kbrp1c
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) ('builtin.module' operation) //----- //
#map = affine_map<(d0) -> (d0)>
module {
func.func @philox_i32_dispatch_0_elementwise_broadcast_2_i32(%arg0: tensor<4xi32>) -> (tensor<2xi32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) {
%c0 = arith.constant 0 : index
%c3 = arith.constant 3 : index
%c2 = arith.constant 2 : index
%c3528531795_i64 = arith.constant 3528531795 : i64
%c3449720151_i64 = arith.constant 3449720151 : i64
%c-1640531527_i32 = arith.constant -1640531527 : i32
This file has been truncated, but you can view the full file.
// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) ('builtin.module' operation) //----- //
#map = affine_map<(d0) -> (d0)>
module {
func.func @philox_i32_dispatch_0_elementwise_broadcast_2_i32(%arg0: tensor<4xi32>) -> (tensor<2xi32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) {
%c0 = arith.constant 0 : index
%c3 = arith.constant 3 : index
%c2 = arith.constant 2 : index
%c3528531795_i64 = arith.constant 3528531795 : i64
%c3449720151_i64 = arith.constant 3449720151 : i64
%c-1640531527_i32 = arith.constant -1640531527 : i32
// -----// IR Dump After LLVMCPUSelectLoweringStrategyPass (iree-llvmcpu-select-lowering-strategy) //----- //
#config = #iree_codegen.lowering_config<tile_sizes = [[16, 64], [8, 32], [0, 0], [0, 0]]>
#config1 = #iree_codegen.lowering_config<tile_sizes = [[16, 64, 0], [16, 64, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
#map = affine_map<(d0, d1) -> (d0, d1)>
#map1 = affine_map<(d0, d1) -> (d1)>
#pipeline_layout = #hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>
#translation = #iree_codegen.t