pashu123’s gists

pashu123 / debug.txt

Created October 23, 2024 13:26

This file has been truncated, but you can view the full file.

	Args: iree-opt --pass-pipeline=builtin.module(func.func(iree-codegen-tile-and-distribute-to-workgroups-using-forall-op, cse)) --mlir-print-local-scope --split-input-file before_scf.mlir --debug
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::chlo::ChloDialect)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::stablehlo::StablehloDialect)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::vhlo::VhloDialect)
	Load new dialect in Context builtin
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr)
	ImplicitTypeIDRegistry::lookupOrInsert(mlir::DistinctAttr)

pashu123 / unltd.mlir

Created October 22, 2024 17:29

	func.func @time_out(%arg0: tensor<1x1x288x8x4xf32>, %arg1: tensor<1152xf32>) -> tensor<1x1x1152xf32> {
	%c0 = arith.constant 0 : index
	%5 = tensor.empty() : tensor<1x1x1152xf32>
	%unpack = tensor.unpack %arg0 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32>
	%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg1, %unpack : tensor<1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) {
	^bb0(%in: f32, %in_0: f32, %out: f32):
	%7 = arith.addf %in, %in_0 : f32
	linalg.yield %7 : f32
	} -> tensor<1x1x1152xf32>
	return %6: tensor<1x1x1152xf32>

pashu123 / new.mlir

Created October 22, 2024 14:13

This file has been truncated, but you can view the full file.

	// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
	#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
	#map1 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
	module {
	func.func @test_dispatch(%arg0: tensor<1x2x128xf32>, %arg1: tensor<1x2x48x30x30xf32>, %arg2: tensor<2x128x48x5x5xf32>) -> (tensor<1x2x128x26x26xf32>, tensor<1x2x128x26x26xf32>) {
	%cst = arith.constant 0.000000e+00 : f32
	%c40896 = arith.constant 40896 : index
	%c3720640 = arith.constant 3720640 : index
	%c259584 = arith.constant 259584 : index
	%c605184 = arith.constant 605184 : index

pashu123 / buff_err.mlir

Created October 16, 2024 14:56

	func.func @scatter_dispatch_0_scatter_2x2xi32_dispatch_tensor_store() attributes {translation_info = #iree_codegen.translation_info<CPUDefault>} {
	%c0 = arith.constant 0 : index
	%0:3 = util.assume.int
	%c0<umin = 0, umax = 0>,
	%c0<umin = 0, umax = 0>,
	%c0<umin = 0, umax = 0>
	: index, index, index
	%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%0#0) flags("ReadOnly\|Indirect") : !flow.dispatch.tensor<readonly:tensor<2xi32>>
	%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%0#1) flags("ReadOnly\|Indirect") : !flow.dispatch.tensor<read

pashu123 / new.txt

Created October 15, 2024 16:06

This file has been truncated, but you can view the full file.

	// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) //----- //
	module {
	func.func @scatter() -> tensor<2x2xi32> {
	%0 = util.unfoldable_constant dense<0> : tensor<2x2xi32>
	%1 = util.unfoldable_constant dense<1> : tensor<2xi32>
	%2 = util.unfoldable_constant dense<[[0, 0], [1, 1]]> : tensor<2x2xi32>
	%3 = iree_linalg_ext.scatter dimension_map = [0, 1] unique_indices(true) ins(%1, %2 : tensor<2xi32>, tensor<2x2xi32>) outs(%0 : tensor<2x2xi32>) {
	^bb0(%arg0: i32, %arg1: i32):
	iree_linalg_ext.yield %arg0 : i32
	} -> tensor<2x2xi32>

pashu123 / file.txt

Created October 15, 2024 09:02

	-----BEGIN PGP PUBLIC KEY BLOCK-----

	mQINBGcOLi4BEACjaOG2i+1QKS/qVZE9O1Kc9UvlMuL4vL/YADcl9WzWZfC+jPcN
	1mPO09qhAyO6CmB9jcmqZP7Utow+6ym/mZGFeHWaFC7KOojxUlPGvjFPHAFfKpQ8
	cN+QGkmhDhcj0DD/niXuzgf3LHnssJp8S8LvrHCz/q/7oA1Ou5gumYplpJdCPXqV
	rKXr3kxhuBufga9kQbjgODcAeG79eA+Gla80wDC+WvsZh01xN+kNkqkAJdnC72fz
	nNlCXldQbyrofqGWrv3lXcQ1EQptqliseOT8nvLgawkomTYRIah9T5rMZ3PqSFpk
	AcgKakHagsykMa5RqdmNKLrUaMm2WvHRloeL+ql/PqAyxQBbwgSoUcajt46MC2Mg
	jqW9rsZTDD+jpMy8hbYBueoCBa2/4PRZU1rxHC1rP98WCvFK0eq9pYcR4nxQbqBL
	dqAo/9r1aF47QRyfvYQO5fu2syBPooQAs0UA5qkwfsnVF6UQ3AMbFNLvl7Kbrp1c

pashu123 / old_philox.txt

Created October 10, 2024 16:15

This file has been truncated, but you can view the full file.

	// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) ('builtin.module' operation) //----- //
	#map = affine_map<(d0) -> (d0)>
	module {
	func.func @philox_i32_dispatch_0_elementwise_broadcast_2_i32(%arg0: tensor<4xi32>) -> (tensor<2xi32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) {
	%c0 = arith.constant 0 : index
	%c3 = arith.constant 3 : index
	%c2 = arith.constant 2 : index
	%c3528531795_i64 = arith.constant 3528531795 : i64
	%c3449720151_i64 = arith.constant 3449720151 : i64
	%c-1640531527_i32 = arith.constant -1640531527 : i32

pashu123 / new_philox.txt

Created October 10, 2024 16:05

This file has been truncated, but you can view the full file.

	// -----// IR Dump After AutoInputConversionPipelinePass (iree-auto-input-conversion) ('builtin.module' operation) //----- //
	#map = affine_map<(d0) -> (d0)>
	module {
	func.func @philox_i32_dispatch_0_elementwise_broadcast_2_i32(%arg0: tensor<4xi32>) -> (tensor<2xi32>, tensor<2xi32>, tensor<2xi32>, tensor<2xi32>) {
	%c0 = arith.constant 0 : index
	%c3 = arith.constant 3 : index
	%c2 = arith.constant 2 : index
	%c3528531795_i64 = arith.constant 3528531795 : i64
	%c3449720151_i64 = arith.constant 3449720151 : i64
	%c-1640531527_i32 = arith.constant -1640531527 : i32

pashu123 / xyz.txt

Created October 7, 2024 05:58

	// -----// IR Dump After LLVMCPUSelectLoweringStrategyPass (iree-llvmcpu-select-lowering-strategy) //----- //
	#config = #iree_codegen.lowering_config<tile_sizes = [[16, 64], [8, 32], [0, 0], [0, 0]]>
	#config1 = #iree_codegen.lowering_config<tile_sizes = [[16, 64, 0], [16, 64, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
	#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "generic", cpu_features = "+avx512f", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128", native_vector_size = 64 : index, target_triple = "x86_64-none-elf"}>
	#map = affine_map<(d0, d1) -> (d0, d1)>
	#map1 = affine_map<(d0, d1) -> (d1)>
	#pipeline_layout = #hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>, #hal.pipeline.binding<storage_buffer>]>
	#translation = #iree_codegen.t

pashu123 / inp.mlir

Created October 4, 2024 16:10

	func.func @prefill_bs4$async_dispatch_20_pack_f16() attributes {translation_info = #iree_codegen.translation_info<CPUDataTiling>} {
	%c32_i64 = arith.constant 32 : i64
	%cst = arith.constant 0.000000e+00 : f16
	%0 = hal.interface.constant.load layout(<constants = 8, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
	%1 = hal.interface.constant.load layout(<constants = 8, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
	%2 = hal.interface.constant.load layout(<constants = 8, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(2) : i32
	%3 = hal.interface.constant.load layout(<constants = 8, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buff

	func.func @prefill_bs4$async_dispatch_20_pack_f16() attributes {translation_info = #iree_codegen.translation_info<CPUDataTiling>} {
	%c32_i64 = arith.constant 32 : i64
	%cst = arith.constant 0.000000e+00 : f16
	%0 = hal.interface.constant.load layout(<constants = 8, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(0) : i32
	%1 = hal.interface.constant.load layout(<constants = 8, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(1) : i32
	%2 = hal.interface.constant.load layout(<constants = 8, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) ordinal(2) : i32
	%3 = hal.interface.constant.load layout(<constants = 8, bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly\|Indirect">, #hal.pipeline.binding<storage_buff

Prashant Kumar pashu123