Created
October 23, 2024 13:39
-
-
Save pashu123/28f1bc4a1eccd7a424efd4dcde8cf411 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Args: iree-opt --pass-pipeline=builtin.module(func.func(iree-codegen-tile-and-distribute-to-workgroups-using-forall-op, cse)) --mlir-print-local-scope --split-input-file before_scf.mlir --debug | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::chlo::ChloDialect) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::stablehlo::StablehloDialect) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::vhlo::VhloDialect) | |
Load new dialect in Context builtin | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemRefLayoutAttrInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::TypedAttr) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ElementsAttr) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DistinctAttr) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ResourceBlobManagerDialectInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmDialectInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeDialectInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineBinaryOpExprStorage) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineConstantExprStorage) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineDimExprStorage) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::AffineMapStorage) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::detail::IntegerSetStorage) | |
Load new dialect in Context builtin | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Stream::AffinityTypeInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::HoistableTypeInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroOperands<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneRegion<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroResults<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroSuccessors<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoRegionArguments<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NoTerminator<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlock<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OpInvariants<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::BytecodeOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AffineScope<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsIsolatedFromAbove<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SymbolTable<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpAsmOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionKindInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasOnlyGraphRegion<Empty>) | |
Load new dialect in Context func | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SymbolUserOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchTerminatorOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectInlinerInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConvertToLLVMPatternInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::bufferization::BufferizableOpInterface) | |
Load new dialect in Context cf | |
Load new dialect in Context arith | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithFastMathInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithIntegerOverflowFlagsInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CastOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithRoundingModeInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SelectLikeOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::bufferization::BufferDeallocationOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ValueBoundsOpInterface) | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Stream::AffinityOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::NumericCastOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::InferIntDivisibilityOpInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::BranchOpInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AutomaticAllocationScope<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CallableOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::FunctionOpInterface::Trait<Empty>) | |
Load new dialect in Context iree_codegen | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DeviceMappingAttrInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Codegen::LoweringConfigAttrInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ViewLikeOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Codegen::UKernelOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ZeroRegions<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneResult<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult<mlir::Type>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ConstantLike<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ConditionallySpeculatable::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AlwaysSpeculatableImplTrait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::MemoryEffectOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferIntRangeInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::InferTypeOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::detail::ConstantOpGenericAdaptorBase::Properties) | |
Load new dialect in Context hal | |
Load new dialect in Context scf | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LoopLikeOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ParallelCombiningOpInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Load new dialect in Context util | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SizedStorageAttr) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SerializableAttrInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::InliningPolicyAttrInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::ReferenceTypeInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SizeAwareTypeInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalTypeInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::InferTypeSizeInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SubrangeTypeInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SizeAwareOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SubrangeOperandOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::SubrangeOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::TiedOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalAddressOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalLoadIndirectOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalLoadOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalStoreIndirectOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::GlobalStoreOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::InitializerOpInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::DeviceInitializationAttrInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Stream::AffinityAttr) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::HoistableAttrInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::AllocatorType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::BufferType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::BufferViewType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::ChannelType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::CommandBufferType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::DeviceType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::EventType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::ExecutableType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::FenceType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::FileType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::HAL::SemaphoreType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::ShapeAwareOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::VMConversionDialectInterface) | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::ProcessorIDInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::ProcessorCountInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::ProcessorTileSizeInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicOperands<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AttrSizedOperandSegments<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::ShapeAwareOpInterface::Trait<Empty>) | |
Load new dialect in Context flow | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Flow::DispatchTensorType) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OffsetSizeAndStrideOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::ClosureOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::HoistableOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DialectFoldInterface) | |
Load new dialect in Context tensor | |
Load new dialect in Context affine | |
Load new dialect in Context ub | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ub::PoisonAttrInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineDmaStartOp) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineMapAccessInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineDmaWaitOp) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineReadOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::AffineWriteOpInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Load new dialect in Context complex | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ShapedDimOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::transform::FindPayloadReplacementOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubsetOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubsetInsertionOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::SubsetExtractionOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::TilingInterface) | |
Ignoring repeated interface registration | |
Load new dialect in Context linalg | |
Load new dialect in Context math | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Load new dialect in Context memref | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::CopyOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::PromotableMemOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestructurableAccessorOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::PromotableAllocationOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestructurableAllocationOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::bufferization::AllocationOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RuntimeVerifiableOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestructurableTypeInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::AggregatedOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ContractionOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::ConvolutionOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::FillOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::mesh::ShardingInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::PartialReductionOpInterface) | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::PartitionableLoopsInterface) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult<mlir::RankedTensorType>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AtLeastNOperands<1>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OffsetSizeAndStrideOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ReifyRankedShapedTypeOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Util::TiedOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Flow::detail::DispatchTensorLoadOpGenericAdaptorBase::Properties) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::AtLeastNOperands<2>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::DestinationStyleOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::tensor::detail::UnPackOpGenericAdaptorBase::Properties) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::VariadicResults<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator<mlir::linalg::YieldOp>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::linalg::LinalgOp::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::NOperands<2>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithFastMathInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsCommutative<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SameOperandsAndResultType<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::VectorUnrollOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Elementwise<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Scalarizable<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Vectorizable<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::Tensorizable<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchTerminatorOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::ReturnLike<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::IsTerminator<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::Flow::detail::DispatchTensorStoreOpGenericAdaptorBase::Properties) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent<mlir::func::FuncOp>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::MemRefsNormalizable<Empty>) | |
Load new dialect in Context iree_linalg_ext | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::LinalgExt::LinalgFusionOpInterface) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::iree_compiler::IREE::LinalgExt::LinalgExtOp) | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
Ignoring repeated interface registration | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::affine::detail::AffineApplyOpGenericAdaptorBase::Properties) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::OneTypedResult<mlir::IndexType>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParent<mlir::scf::ForallOp>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::ParallelCombiningOpInterface::Trait<Empty>) | |
makeTiledShapes: for loop#0 | |
computeTileOffsets: 0 : index | |
makeTiledShapes: for loop#1 | |
computeTileOffsets: 0 : index | |
makeTiledShapes: for loop#2 | |
computeTileOffsets: <block argument> of type 'index' at index: 0 | |
computeTileSizes: 0 : index | |
computeTileSizes: 0 : index | |
computeTileSizes: 1151 : index | |
makeTiledShapes: for operand ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::SingleBlockImplicitTerminator<mlir::scf::InParallelOp>::Impl<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::LoopLikeOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasRecursiveMemoryEffects<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::RegionBranchOpInterface::Trait<Empty>) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::OpTrait::HasParallelRegion<Empty>) | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32>: tiled: figure out subshape... | |
computeSliceParameters: for dim#0: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d0) | |
computeSliceParameters: raw size: 1 : index | |
computeSliceParameters: new offset: 0 : index | |
makeTiledShape: new size: 1 : index | |
computeSliceParameters: for dim#1: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d1) | |
computeSliceParameters: raw size: 1 : index | |
computeSliceParameters: new offset: 0 : index | |
makeTiledShape: new size: 1 : index | |
computeSliceParameters: for dim#2: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d2) | |
computeSliceParameters: raw size: 1152 : index | |
computeSliceParameters: new offset: <block argument> of type 'index' at index: 0 | |
makeTiledShape: new size: 1152 : index | |
makeTiledShapes: for operand %unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32>: tiled: figure out subshape... | |
computeSliceParameters: for dim#0: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d0) | |
computeSliceParameters: raw size: 1 : index | |
computeSliceParameters: new offset: 0 : index | |
makeTiledShape: new size: 1 : index | |
computeSliceParameters: for dim#1: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d1) | |
computeSliceParameters: raw size: 1 : index | |
computeSliceParameters: new offset: 0 : index | |
makeTiledShape: new size: 1 : index | |
computeSliceParameters: for dim#2: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d2) | |
computeSliceParameters: raw size: 1152 : index | |
computeSliceParameters: new offset: <block argument> of type 'index' at index: 0 | |
makeTiledShape: new size: 1152 : index | |
makeTiledShapes: for operand <block argument> of type 'tensor<1x1x1152xf32>' at index: 1: tiled: figure out subshape... | |
computeSliceParameters: for dim#0: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d0) | |
computeSliceParameters: raw size: 1 : index | |
computeSliceParameters: new offset: 0 : index | |
makeTiledShape: new size: 1 : index | |
computeSliceParameters: for dim#1: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d1) | |
computeSliceParameters: raw size: 1 : index | |
computeSliceParameters: new offset: 0 : index | |
makeTiledShape: new size: 1 : index | |
computeSliceParameters: for dim#2: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d2) | |
computeSliceParameters: raw size: 1152 : index | |
computeSliceParameters: new offset: <block argument> of type 'index' at index: 0 | |
makeTiledShape: new size: 1152 : index | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::tensor::detail::ExtractSliceOpGenericAdaptorBase::Properties) | |
computeSliceParameters: for dim#0: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d0) | |
computeSliceParameters: raw size: 1 : index | |
computeSliceParameters: new offset: 0 : index | |
makeTiledShape: new size: 1 : index | |
computeSliceParameters: for dim#1: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d1) | |
computeSliceParameters: raw size: 1 : index | |
computeSliceParameters: new offset: 0 : index | |
makeTiledShape: new size: 1 : index | |
computeSliceParameters: for dim#2: tiled: figure out subsize... | |
computeSliceParameters: submap: (d0, d1, d2) -> (d2) | |
computeSliceParameters: raw size: 1152 : index | |
computeSliceParameters: new offset: <block argument> of type 'index' at index: 0 | |
makeTiledShape: new size: 1152 : index | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::tensor::detail::ParallelInsertSliceOpGenericAdaptorBase::Properties) | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%extracted_slice = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_1 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%8 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice, %extracted_slice_0 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_1 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_2: f32, %out: f32): | |
%9 = arith.addf %in, %in_2 : f32 | |
linalg.yield %9 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %8 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_1 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_2 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%9 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_1 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_2 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_3: f32, %out: f32): | |
%10 = arith.addf %in, %in_3 : f32 | |
linalg.yield %10 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %9 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::detail::MulIOpGenericAdaptorBase::Properties) | |
ImplicitTypeIDRegistry::lookupOrInsert(mlir::arith::ArithIntegerOverflowFlagsInterface::Trait<Empty>) | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_8 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_9 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_10 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%15 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_8 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_10 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_11: f32, %out: f32): | |
%16 = arith.addf %in, %in_11 : f32 | |
linalg.yield %16 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %15 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_8 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_9 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%15 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_10 = tensor.extract_slice %15[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_11 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%16 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_8 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_11 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_12: f32, %out: f32): | |
%17 = arith.addf %in, %in_12 : f32 | |
linalg.yield %17 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %16 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_16 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_17 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_18 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%20 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_19 = tensor.extract_slice %20[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_20 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%21 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_16 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_20 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_21: f32, %out: f32): | |
%22 = arith.addf %in, %in_21 : f32 | |
linalg.yield %22 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %21 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_24 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_25 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_26 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_27 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%25 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_28 = tensor.extract_slice %25[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_29 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%26 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_24 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_29 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_30: f32, %out: f32): | |
%27 = arith.addf %in, %in_30 : f32 | |
linalg.yield %27 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %26 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_32 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_33 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_34 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_35 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_36 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%30 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_37 = tensor.extract_slice %30[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_38 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%31 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_32 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_38 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_39: f32, %out: f32): | |
%32 = arith.addf %in, %in_39 : f32 | |
linalg.yield %32 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %31 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_40 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_41 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_42 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_43 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_44 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_45 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%35 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_46 = tensor.extract_slice %35[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_47 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%36 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_40 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_47 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_48: f32, %out: f32): | |
%37 = arith.addf %in, %in_48 : f32 | |
linalg.yield %37 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %36 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_48 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_49 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_50 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_51 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_52 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_53 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_54 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%40 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_55 = tensor.extract_slice %40[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_56 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%41 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_48 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_56 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_57: f32, %out: f32): | |
%42 = arith.addf %in, %in_57 : f32 | |
linalg.yield %42 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %41 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_56 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_57 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_58 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_59 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_60 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_61 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_62 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_63 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%45 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_64 = tensor.extract_slice %45[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_65 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%46 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_56 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_65 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_66: f32, %out: f32): | |
%47 = arith.addf %in, %in_66 : f32 | |
linalg.yield %47 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %46 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_64 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_65 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_66 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_67 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_68 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_69 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_70 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_71 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_72 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%50 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_73 = tensor.extract_slice %50[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_74 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%51 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_64 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_74 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_75: f32, %out: f32): | |
%52 = arith.addf %in, %in_75 : f32 | |
linalg.yield %52 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %51 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_72 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_73 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_74 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_75 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_76 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_77 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_78 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_79 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_80 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_81 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%55 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_82 = tensor.extract_slice %55[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_83 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%56 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_72 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_83 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_84: f32, %out: f32): | |
%57 = arith.addf %in, %in_84 : f32 | |
linalg.yield %57 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %56 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_80 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_81 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_82 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_83 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_84 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_85 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_86 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_87 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_88 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_89 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_90 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%60 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_91 = tensor.extract_slice %60[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_92 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%61 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_80 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_92 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_93: f32, %out: f32): | |
%62 = arith.addf %in, %in_93 : f32 | |
linalg.yield %62 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %61 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_88 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_89 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_90 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_91 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_92 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_93 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_94 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_95 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_96 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_97 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_98 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_99 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%65 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_100 = tensor.extract_slice %65[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_101 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%66 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_88 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_101 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_102: f32, %out: f32): | |
%67 = arith.addf %in, %in_102 : f32 | |
linalg.yield %67 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %66 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_96 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_97 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_98 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_99 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_100 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_101 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_102 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_103 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_104 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_105 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_106 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_107 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_108 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%70 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_109 = tensor.extract_slice %70[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_110 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%71 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_96 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_110 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_111: f32, %out: f32): | |
%72 = arith.addf %in, %in_111 : f32 | |
linalg.yield %72 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %71 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_104 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_105 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_106 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_107 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_108 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_109 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_110 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_111 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_112 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_113 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_114 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_115 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_116 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_117 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%75 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_118 = tensor.extract_slice %75[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_119 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%76 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_104 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_119 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_120: f32, %out: f32): | |
%77 = arith.addf %in, %in_120 : f32 | |
linalg.yield %77 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %76 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_112 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_113 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_114 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_115 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_116 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_117 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_118 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_119 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_120 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_121 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_122 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_123 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_124 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_125 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_126 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%80 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_127 = tensor.extract_slice %80[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_128 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%81 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_112 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_128 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_129: f32, %out: f32): | |
%82 = arith.addf %in, %in_129 : f32 | |
linalg.yield %82 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %81 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_120 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_121 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_122 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_123 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_124 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_125 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_126 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_127 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_128 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_129 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_130 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_131 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_132 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_133 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_134 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_135 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%85 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_136 = tensor.extract_slice %85[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_137 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%86 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_120 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_137 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_138: f32, %out: f32): | |
%87 = arith.addf %in, %in_138 : f32 | |
linalg.yield %87 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %86 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_128 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_129 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_130 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_131 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_132 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_133 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_134 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_135 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_136 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_137 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_138 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_139 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_140 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_141 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_142 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_143 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_144 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%90 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_145 = tensor.extract_slice %90[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_146 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%91 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_128 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_146 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_147: f32, %out: f32): | |
%92 = arith.addf %in, %in_147 : f32 | |
linalg.yield %92 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %91 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_136 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_137 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_138 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_139 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_140 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_141 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_142 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_143 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_144 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_145 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_146 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_147 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_148 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_149 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_150 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_151 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_152 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_153 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%95 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_154 = tensor.extract_slice %95[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_155 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%96 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_136 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_155 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_156: f32, %out: f32): | |
%97 = arith.addf %in, %in_156 : f32 | |
linalg.yield %97 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %96 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_144 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_145 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_146 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_147 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_148 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_149 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_150 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_151 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_152 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_153 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_154 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_155 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_156 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_157 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_158 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_159 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_160 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_161 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_162 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%100 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_163 = tensor.extract_slice %100[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_164 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%101 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_144 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_164 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_165: f32, %out: f32): | |
%102 = arith.addf %in, %in_165 : f32 | |
linalg.yield %102 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %101 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_152 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_153 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_154 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_155 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_156 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_157 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_158 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_159 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_160 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_161 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_162 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_163 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_164 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_165 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_166 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_167 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_168 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_169 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_170 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_171 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%105 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_172 = tensor.extract_slice %105[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_173 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%106 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_152 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_173 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_174: f32, %out: f32): | |
%107 = arith.addf %in, %in_174 : f32 | |
linalg.yield %107 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %106 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_160 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_161 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_162 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_163 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_164 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_165 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_166 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_167 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_168 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_169 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_170 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_171 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_172 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_173 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_174 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_175 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_176 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_177 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_178 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_179 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_180 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%110 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_181 = tensor.extract_slice %110[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_182 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%111 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_160 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_182 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_183: f32, %out: f32): | |
%112 = arith.addf %in, %in_183 : f32 | |
linalg.yield %112 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %111 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_168 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_169 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_170 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_171 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_172 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_173 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_174 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_175 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_176 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_177 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_178 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_179 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_180 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_181 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_182 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_183 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_184 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_185 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_186 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_187 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_188 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_189 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%115 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_190 = tensor.extract_slice %115[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_191 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%116 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_168 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_191 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_192: f32, %out: f32): | |
%117 = arith.addf %in, %in_192 : f32 | |
linalg.yield %117 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %116 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_176 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_177 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_178 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_179 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_180 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_181 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_182 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_183 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_184 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_185 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_186 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_187 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_188 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_189 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_190 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_191 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_192 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_193 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_194 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_195 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_196 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_197 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_198 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%120 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_199 = tensor.extract_slice %120[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_200 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%121 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_176 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_200 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_201: f32, %out: f32): | |
%122 = arith.addf %in, %in_201 : f32 | |
linalg.yield %122 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %121 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_184 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_185 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_186 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_187 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_188 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_189 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_190 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_191 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_192 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_193 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_194 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_195 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_196 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_197 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_198 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_199 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_200 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_201 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_202 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_203 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_204 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_205 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_206 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_207 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%125 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_208 = tensor.extract_slice %125[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_209 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%126 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_184 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_209 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_210: f32, %out: f32): | |
%127 = arith.addf %in, %in_210 : f32 | |
linalg.yield %127 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %126 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_192 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_193 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_194 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_195 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_196 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_197 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_198 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_199 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_200 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_201 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_202 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_203 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_204 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_205 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_206 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_207 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_208 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_209 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_210 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_211 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_212 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_213 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_214 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_215 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_216 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%130 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_217 = tensor.extract_slice %130[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_218 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%131 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_192 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_218 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_219: f32, %out: f32): | |
%132 = arith.addf %in, %in_219 : f32 | |
linalg.yield %132 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %131 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_200 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_201 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_202 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_203 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_204 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_205 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_206 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_207 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_208 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_209 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_210 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_211 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_212 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_213 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_214 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_215 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_216 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_217 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_218 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_219 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_220 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_221 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_222 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_223 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_224 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_225 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%135 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_226 = tensor.extract_slice %135[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_227 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%136 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_200 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_227 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_228: f32, %out: f32): | |
%137 = arith.addf %in, %in_228 : f32 | |
linalg.yield %137 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %136 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_208 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_209 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_210 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_211 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_212 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_213 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_214 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_215 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_216 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_217 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_218 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_219 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_220 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_221 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_222 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_223 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_224 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_225 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_226 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_227 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_228 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_229 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_230 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_231 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_232 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_233 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_234 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%140 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_235 = tensor.extract_slice %140[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_236 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%141 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_208 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_236 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_237: f32, %out: f32): | |
%142 = arith.addf %in, %in_237 : f32 | |
linalg.yield %142 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %141 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_216 = tensor.extract_slice %unpack_215[0, %141, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_217 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_218 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_219 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_220 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_221 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_222 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_223 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_224 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_225 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_226 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_227 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_228 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_229 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_230 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_231 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_232 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_233 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_234 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_235 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_236 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_237 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_238 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_239 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_240 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_241 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_242 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_243 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%145 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_244 = tensor.extract_slice %145[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_245 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%146 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_216 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_245 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_246: f32, %out: f32): | |
%147 = arith.addf %in, %in_246 : f32 | |
linalg.yield %147 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %146 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_216 = arith.constant 8 : index | |
%145 = affine.apply affine_map<() -> (0)>() | |
%146 = affine.apply affine_map<() -> (0)>() | |
%c0_217 = arith.constant 0 : index | |
%c8_218 = arith.constant 8 : index | |
%147 = affine.apply affine_map<() -> (0)>() | |
%148 = affine.apply affine_map<() -> (0)>() | |
%c1_219 = arith.constant 1 : index | |
%c8_220 = arith.constant 8 : index | |
%c8_221 = arith.constant 8 : index | |
%extracted_slice_222 = tensor.extract_slice %extracted_slice_214[0, %145, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%149 = tensor.empty(%c8_221) : tensor<1x?x1152xf32> | |
%unpack_223 = tensor.unpack %extracted_slice_222 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %149 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_224 = tensor.extract_slice %unpack_223[0, %146, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_225 = tensor.extract_slice %unpack_215[0, %141, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_226 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_227 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_228 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_229 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_230 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_231 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_232 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_233 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_234 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_235 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_236 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_237 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_238 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_239 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_240 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_241 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_242 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_243 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_244 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_245 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_246 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_247 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_248 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_249 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_250 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_251 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_252 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%150 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_253 = tensor.extract_slice %150[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_254 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%151 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_224 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_254 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_255: f32, %out: f32): | |
%152 = arith.addf %in, %in_255 : f32 | |
linalg.yield %152 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %151 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_216 = arith.constant 8 : index | |
%145 = affine.apply affine_map<() -> (0)>() | |
%146 = affine.apply affine_map<() -> (0)>() | |
%c0_217 = arith.constant 0 : index | |
%c8_218 = arith.constant 8 : index | |
%147 = affine.apply affine_map<() -> (0)>() | |
%148 = affine.apply affine_map<() -> (0)>() | |
%c1_219 = arith.constant 1 : index | |
%c8_220 = arith.constant 8 : index | |
%c8_221 = arith.constant 8 : index | |
%extracted_slice_222 = tensor.extract_slice %extracted_slice_214[0, %145, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%149 = tensor.empty(%c8_221) : tensor<1x?x1152xf32> | |
%unpack_223 = tensor.unpack %extracted_slice_222 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %149 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_224 = arith.constant 8 : index | |
%150 = affine.apply affine_map<() -> (0)>() | |
%151 = affine.apply affine_map<() -> (0)>() | |
%c0_225 = arith.constant 0 : index | |
%c8_226 = arith.constant 8 : index | |
%152 = affine.apply affine_map<() -> (0)>() | |
%153 = affine.apply affine_map<() -> (0)>() | |
%c1_227 = arith.constant 1 : index | |
%c8_228 = arith.constant 8 : index | |
%c8_229 = arith.constant 8 : index | |
%extracted_slice_230 = tensor.extract_slice %extracted_slice_222[0, %150, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%154 = tensor.empty(%c8_229) : tensor<1x?x1152xf32> | |
%unpack_231 = tensor.unpack %extracted_slice_230 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %154 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_232 = tensor.extract_slice %unpack_231[0, %151, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_233 = tensor.extract_slice %unpack_223[0, %146, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_234 = tensor.extract_slice %unpack_215[0, %141, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_235 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_236 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_237 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_238 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_239 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_240 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_241 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_242 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_243 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_244 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_245 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_246 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_247 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_248 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_249 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_250 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_251 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_252 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_253 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_254 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_255 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_256 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_257 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_258 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_259 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_260 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_261 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%155 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_262 = tensor.extract_slice %155[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_263 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%156 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_232 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_263 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_264: f32, %out: f32): | |
%157 = arith.addf %in, %in_264 : f32 | |
linalg.yield %157 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %156 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_216 = arith.constant 8 : index | |
%145 = affine.apply affine_map<() -> (0)>() | |
%146 = affine.apply affine_map<() -> (0)>() | |
%c0_217 = arith.constant 0 : index | |
%c8_218 = arith.constant 8 : index | |
%147 = affine.apply affine_map<() -> (0)>() | |
%148 = affine.apply affine_map<() -> (0)>() | |
%c1_219 = arith.constant 1 : index | |
%c8_220 = arith.constant 8 : index | |
%c8_221 = arith.constant 8 : index | |
%extracted_slice_222 = tensor.extract_slice %extracted_slice_214[0, %145, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%149 = tensor.empty(%c8_221) : tensor<1x?x1152xf32> | |
%unpack_223 = tensor.unpack %extracted_slice_222 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %149 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_224 = arith.constant 8 : index | |
%150 = affine.apply affine_map<() -> (0)>() | |
%151 = affine.apply affine_map<() -> (0)>() | |
%c0_225 = arith.constant 0 : index | |
%c8_226 = arith.constant 8 : index | |
%152 = affine.apply affine_map<() -> (0)>() | |
%153 = affine.apply affine_map<() -> (0)>() | |
%c1_227 = arith.constant 1 : index | |
%c8_228 = arith.constant 8 : index | |
%c8_229 = arith.constant 8 : index | |
%extracted_slice_230 = tensor.extract_slice %extracted_slice_222[0, %150, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%154 = tensor.empty(%c8_229) : tensor<1x?x1152xf32> | |
%unpack_231 = tensor.unpack %extracted_slice_230 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %154 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_232 = arith.constant 8 : index | |
%155 = affine.apply affine_map<() -> (0)>() | |
%156 = affine.apply affine_map<() -> (0)>() | |
%c0_233 = arith.constant 0 : index | |
%c8_234 = arith.constant 8 : index | |
%157 = affine.apply affine_map<() -> (0)>() | |
%158 = affine.apply affine_map<() -> (0)>() | |
%c1_235 = arith.constant 1 : index | |
%c8_236 = arith.constant 8 : index | |
%c8_237 = arith.constant 8 : index | |
%extracted_slice_238 = tensor.extract_slice %extracted_slice_230[0, %155, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%159 = tensor.empty(%c8_237) : tensor<1x?x1152xf32> | |
%unpack_239 = tensor.unpack %extracted_slice_238 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %159 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_240 = tensor.extract_slice %unpack_239[0, %156, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_241 = tensor.extract_slice %unpack_231[0, %151, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_242 = tensor.extract_slice %unpack_223[0, %146, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_243 = tensor.extract_slice %unpack_215[0, %141, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_244 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_245 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_246 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_247 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_248 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_249 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_250 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_251 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_252 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_253 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_254 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_255 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_256 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_257 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_258 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_259 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_260 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_261 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_262 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_263 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_264 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_265 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_266 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_267 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_268 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_269 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_270 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%160 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_271 = tensor.extract_slice %160[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_272 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%161 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_240 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_272 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_273: f32, %out: f32): | |
%162 = arith.addf %in, %in_273 : f32 | |
linalg.yield %162 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %161 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_216 = arith.constant 8 : index | |
%145 = affine.apply affine_map<() -> (0)>() | |
%146 = affine.apply affine_map<() -> (0)>() | |
%c0_217 = arith.constant 0 : index | |
%c8_218 = arith.constant 8 : index | |
%147 = affine.apply affine_map<() -> (0)>() | |
%148 = affine.apply affine_map<() -> (0)>() | |
%c1_219 = arith.constant 1 : index | |
%c8_220 = arith.constant 8 : index | |
%c8_221 = arith.constant 8 : index | |
%extracted_slice_222 = tensor.extract_slice %extracted_slice_214[0, %145, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%149 = tensor.empty(%c8_221) : tensor<1x?x1152xf32> | |
%unpack_223 = tensor.unpack %extracted_slice_222 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %149 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_224 = arith.constant 8 : index | |
%150 = affine.apply affine_map<() -> (0)>() | |
%151 = affine.apply affine_map<() -> (0)>() | |
%c0_225 = arith.constant 0 : index | |
%c8_226 = arith.constant 8 : index | |
%152 = affine.apply affine_map<() -> (0)>() | |
%153 = affine.apply affine_map<() -> (0)>() | |
%c1_227 = arith.constant 1 : index | |
%c8_228 = arith.constant 8 : index | |
%c8_229 = arith.constant 8 : index | |
%extracted_slice_230 = tensor.extract_slice %extracted_slice_222[0, %150, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%154 = tensor.empty(%c8_229) : tensor<1x?x1152xf32> | |
%unpack_231 = tensor.unpack %extracted_slice_230 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %154 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_232 = arith.constant 8 : index | |
%155 = affine.apply affine_map<() -> (0)>() | |
%156 = affine.apply affine_map<() -> (0)>() | |
%c0_233 = arith.constant 0 : index | |
%c8_234 = arith.constant 8 : index | |
%157 = affine.apply affine_map<() -> (0)>() | |
%158 = affine.apply affine_map<() -> (0)>() | |
%c1_235 = arith.constant 1 : index | |
%c8_236 = arith.constant 8 : index | |
%c8_237 = arith.constant 8 : index | |
%extracted_slice_238 = tensor.extract_slice %extracted_slice_230[0, %155, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%159 = tensor.empty(%c8_237) : tensor<1x?x1152xf32> | |
%unpack_239 = tensor.unpack %extracted_slice_238 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %159 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_240 = arith.constant 8 : index | |
%160 = affine.apply affine_map<() -> (0)>() | |
%161 = affine.apply affine_map<() -> (0)>() | |
%c0_241 = arith.constant 0 : index | |
%c8_242 = arith.constant 8 : index | |
%162 = affine.apply affine_map<() -> (0)>() | |
%163 = affine.apply affine_map<() -> (0)>() | |
%c1_243 = arith.constant 1 : index | |
%c8_244 = arith.constant 8 : index | |
%c8_245 = arith.constant 8 : index | |
%extracted_slice_246 = tensor.extract_slice %extracted_slice_238[0, %160, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%164 = tensor.empty(%c8_245) : tensor<1x?x1152xf32> | |
%unpack_247 = tensor.unpack %extracted_slice_246 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %164 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_248 = tensor.extract_slice %unpack_247[0, %161, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_249 = tensor.extract_slice %unpack_239[0, %156, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_250 = tensor.extract_slice %unpack_231[0, %151, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_251 = tensor.extract_slice %unpack_223[0, %146, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_252 = tensor.extract_slice %unpack_215[0, %141, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_253 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_254 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_255 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_256 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_257 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_258 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_259 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_260 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_261 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_262 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_263 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_264 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_265 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_266 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_267 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_268 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_269 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_270 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_271 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_272 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_273 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_274 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_275 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_276 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_277 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_278 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_279 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%165 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_280 = tensor.extract_slice %165[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_281 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%166 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_248 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_281 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_282: f32, %out: f32): | |
%167 = arith.addf %in, %in_282 : f32 | |
linalg.yield %167 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %166 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_216 = arith.constant 8 : index | |
%145 = affine.apply affine_map<() -> (0)>() | |
%146 = affine.apply affine_map<() -> (0)>() | |
%c0_217 = arith.constant 0 : index | |
%c8_218 = arith.constant 8 : index | |
%147 = affine.apply affine_map<() -> (0)>() | |
%148 = affine.apply affine_map<() -> (0)>() | |
%c1_219 = arith.constant 1 : index | |
%c8_220 = arith.constant 8 : index | |
%c8_221 = arith.constant 8 : index | |
%extracted_slice_222 = tensor.extract_slice %extracted_slice_214[0, %145, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%149 = tensor.empty(%c8_221) : tensor<1x?x1152xf32> | |
%unpack_223 = tensor.unpack %extracted_slice_222 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %149 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_224 = arith.constant 8 : index | |
%150 = affine.apply affine_map<() -> (0)>() | |
%151 = affine.apply affine_map<() -> (0)>() | |
%c0_225 = arith.constant 0 : index | |
%c8_226 = arith.constant 8 : index | |
%152 = affine.apply affine_map<() -> (0)>() | |
%153 = affine.apply affine_map<() -> (0)>() | |
%c1_227 = arith.constant 1 : index | |
%c8_228 = arith.constant 8 : index | |
%c8_229 = arith.constant 8 : index | |
%extracted_slice_230 = tensor.extract_slice %extracted_slice_222[0, %150, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%154 = tensor.empty(%c8_229) : tensor<1x?x1152xf32> | |
%unpack_231 = tensor.unpack %extracted_slice_230 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %154 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_232 = arith.constant 8 : index | |
%155 = affine.apply affine_map<() -> (0)>() | |
%156 = affine.apply affine_map<() -> (0)>() | |
%c0_233 = arith.constant 0 : index | |
%c8_234 = arith.constant 8 : index | |
%157 = affine.apply affine_map<() -> (0)>() | |
%158 = affine.apply affine_map<() -> (0)>() | |
%c1_235 = arith.constant 1 : index | |
%c8_236 = arith.constant 8 : index | |
%c8_237 = arith.constant 8 : index | |
%extracted_slice_238 = tensor.extract_slice %extracted_slice_230[0, %155, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%159 = tensor.empty(%c8_237) : tensor<1x?x1152xf32> | |
%unpack_239 = tensor.unpack %extracted_slice_238 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %159 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_240 = arith.constant 8 : index | |
%160 = affine.apply affine_map<() -> (0)>() | |
%161 = affine.apply affine_map<() -> (0)>() | |
%c0_241 = arith.constant 0 : index | |
%c8_242 = arith.constant 8 : index | |
%162 = affine.apply affine_map<() -> (0)>() | |
%163 = affine.apply affine_map<() -> (0)>() | |
%c1_243 = arith.constant 1 : index | |
%c8_244 = arith.constant 8 : index | |
%c8_245 = arith.constant 8 : index | |
%extracted_slice_246 = tensor.extract_slice %extracted_slice_238[0, %160, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%164 = tensor.empty(%c8_245) : tensor<1x?x1152xf32> | |
%unpack_247 = tensor.unpack %extracted_slice_246 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %164 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_248 = arith.constant 8 : index | |
%165 = affine.apply affine_map<() -> (0)>() | |
%166 = affine.apply affine_map<() -> (0)>() | |
%c0_249 = arith.constant 0 : index | |
%c8_250 = arith.constant 8 : index | |
%167 = affine.apply affine_map<() -> (0)>() | |
%168 = affine.apply affine_map<() -> (0)>() | |
%c1_251 = arith.constant 1 : index | |
%c8_252 = arith.constant 8 : index | |
%c8_253 = arith.constant 8 : index | |
%extracted_slice_254 = tensor.extract_slice %extracted_slice_246[0, %165, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%169 = tensor.empty(%c8_253) : tensor<1x?x1152xf32> | |
%unpack_255 = tensor.unpack %extracted_slice_254 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %169 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_256 = tensor.extract_slice %unpack_255[0, %166, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_257 = tensor.extract_slice %unpack_247[0, %161, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_258 = tensor.extract_slice %unpack_239[0, %156, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_259 = tensor.extract_slice %unpack_231[0, %151, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_260 = tensor.extract_slice %unpack_223[0, %146, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_261 = tensor.extract_slice %unpack_215[0, %141, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_262 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_263 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_264 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_265 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_266 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_267 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_268 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_269 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_270 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_271 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_272 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_273 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_274 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_275 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_276 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_277 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_278 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_279 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_280 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_281 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_282 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_283 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_284 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_285 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_286 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_287 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_288 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%170 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_289 = tensor.extract_slice %170[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_290 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%171 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_256 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_290 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_291: f32, %out: f32): | |
%172 = arith.addf %in, %in_291 : f32 | |
linalg.yield %172 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %171 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_216 = arith.constant 8 : index | |
%145 = affine.apply affine_map<() -> (0)>() | |
%146 = affine.apply affine_map<() -> (0)>() | |
%c0_217 = arith.constant 0 : index | |
%c8_218 = arith.constant 8 : index | |
%147 = affine.apply affine_map<() -> (0)>() | |
%148 = affine.apply affine_map<() -> (0)>() | |
%c1_219 = arith.constant 1 : index | |
%c8_220 = arith.constant 8 : index | |
%c8_221 = arith.constant 8 : index | |
%extracted_slice_222 = tensor.extract_slice %extracted_slice_214[0, %145, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%149 = tensor.empty(%c8_221) : tensor<1x?x1152xf32> | |
%unpack_223 = tensor.unpack %extracted_slice_222 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %149 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_224 = arith.constant 8 : index | |
%150 = affine.apply affine_map<() -> (0)>() | |
%151 = affine.apply affine_map<() -> (0)>() | |
%c0_225 = arith.constant 0 : index | |
%c8_226 = arith.constant 8 : index | |
%152 = affine.apply affine_map<() -> (0)>() | |
%153 = affine.apply affine_map<() -> (0)>() | |
%c1_227 = arith.constant 1 : index | |
%c8_228 = arith.constant 8 : index | |
%c8_229 = arith.constant 8 : index | |
%extracted_slice_230 = tensor.extract_slice %extracted_slice_222[0, %150, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%154 = tensor.empty(%c8_229) : tensor<1x?x1152xf32> | |
%unpack_231 = tensor.unpack %extracted_slice_230 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %154 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_232 = arith.constant 8 : index | |
%155 = affine.apply affine_map<() -> (0)>() | |
%156 = affine.apply affine_map<() -> (0)>() | |
%c0_233 = arith.constant 0 : index | |
%c8_234 = arith.constant 8 : index | |
%157 = affine.apply affine_map<() -> (0)>() | |
%158 = affine.apply affine_map<() -> (0)>() | |
%c1_235 = arith.constant 1 : index | |
%c8_236 = arith.constant 8 : index | |
%c8_237 = arith.constant 8 : index | |
%extracted_slice_238 = tensor.extract_slice %extracted_slice_230[0, %155, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%159 = tensor.empty(%c8_237) : tensor<1x?x1152xf32> | |
%unpack_239 = tensor.unpack %extracted_slice_238 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %159 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_240 = arith.constant 8 : index | |
%160 = affine.apply affine_map<() -> (0)>() | |
%161 = affine.apply affine_map<() -> (0)>() | |
%c0_241 = arith.constant 0 : index | |
%c8_242 = arith.constant 8 : index | |
%162 = affine.apply affine_map<() -> (0)>() | |
%163 = affine.apply affine_map<() -> (0)>() | |
%c1_243 = arith.constant 1 : index | |
%c8_244 = arith.constant 8 : index | |
%c8_245 = arith.constant 8 : index | |
%extracted_slice_246 = tensor.extract_slice %extracted_slice_238[0, %160, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%164 = tensor.empty(%c8_245) : tensor<1x?x1152xf32> | |
%unpack_247 = tensor.unpack %extracted_slice_246 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %164 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_248 = arith.constant 8 : index | |
%165 = affine.apply affine_map<() -> (0)>() | |
%166 = affine.apply affine_map<() -> (0)>() | |
%c0_249 = arith.constant 0 : index | |
%c8_250 = arith.constant 8 : index | |
%167 = affine.apply affine_map<() -> (0)>() | |
%168 = affine.apply affine_map<() -> (0)>() | |
%c1_251 = arith.constant 1 : index | |
%c8_252 = arith.constant 8 : index | |
%c8_253 = arith.constant 8 : index | |
%extracted_slice_254 = tensor.extract_slice %extracted_slice_246[0, %165, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%169 = tensor.empty(%c8_253) : tensor<1x?x1152xf32> | |
%unpack_255 = tensor.unpack %extracted_slice_254 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %169 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_256 = arith.constant 8 : index | |
%170 = affine.apply affine_map<() -> (0)>() | |
%171 = affine.apply affine_map<() -> (0)>() | |
%c0_257 = arith.constant 0 : index | |
%c8_258 = arith.constant 8 : index | |
%172 = affine.apply affine_map<() -> (0)>() | |
%173 = affine.apply affine_map<() -> (0)>() | |
%c1_259 = arith.constant 1 : index | |
%c8_260 = arith.constant 8 : index | |
%c8_261 = arith.constant 8 : index | |
%extracted_slice_262 = tensor.extract_slice %extracted_slice_254[0, %170, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%174 = tensor.empty(%c8_261) : tensor<1x?x1152xf32> | |
%unpack_263 = tensor.unpack %extracted_slice_262 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %174 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_264 = tensor.extract_slice %unpack_263[0, %171, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_265 = tensor.extract_slice %unpack_255[0, %166, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_266 = tensor.extract_slice %unpack_247[0, %161, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_267 = tensor.extract_slice %unpack_239[0, %156, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_268 = tensor.extract_slice %unpack_231[0, %151, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_269 = tensor.extract_slice %unpack_223[0, %146, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_270 = tensor.extract_slice %unpack_215[0, %141, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_271 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_272 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_273 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_274 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_275 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_276 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_277 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_278 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_279 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_280 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_281 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_282 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_283 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_284 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_285 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_286 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_287 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_288 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_289 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_290 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_291 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_292 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_293 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_294 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_295 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_296 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_297 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%175 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_298 = tensor.extract_slice %175[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_299 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%176 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_264 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_299 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_300: f32, %out: f32): | |
%177 = arith.addf %in, %in_300 : f32 | |
linalg.yield %177 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %176 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_216 = arith.constant 8 : index | |
%145 = affine.apply affine_map<() -> (0)>() | |
%146 = affine.apply affine_map<() -> (0)>() | |
%c0_217 = arith.constant 0 : index | |
%c8_218 = arith.constant 8 : index | |
%147 = affine.apply affine_map<() -> (0)>() | |
%148 = affine.apply affine_map<() -> (0)>() | |
%c1_219 = arith.constant 1 : index | |
%c8_220 = arith.constant 8 : index | |
%c8_221 = arith.constant 8 : index | |
%extracted_slice_222 = tensor.extract_slice %extracted_slice_214[0, %145, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%149 = tensor.empty(%c8_221) : tensor<1x?x1152xf32> | |
%unpack_223 = tensor.unpack %extracted_slice_222 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %149 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_224 = arith.constant 8 : index | |
%150 = affine.apply affine_map<() -> (0)>() | |
%151 = affine.apply affine_map<() -> (0)>() | |
%c0_225 = arith.constant 0 : index | |
%c8_226 = arith.constant 8 : index | |
%152 = affine.apply affine_map<() -> (0)>() | |
%153 = affine.apply affine_map<() -> (0)>() | |
%c1_227 = arith.constant 1 : index | |
%c8_228 = arith.constant 8 : index | |
%c8_229 = arith.constant 8 : index | |
%extracted_slice_230 = tensor.extract_slice %extracted_slice_222[0, %150, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%154 = tensor.empty(%c8_229) : tensor<1x?x1152xf32> | |
%unpack_231 = tensor.unpack %extracted_slice_230 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %154 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_232 = arith.constant 8 : index | |
%155 = affine.apply affine_map<() -> (0)>() | |
%156 = affine.apply affine_map<() -> (0)>() | |
%c0_233 = arith.constant 0 : index | |
%c8_234 = arith.constant 8 : index | |
%157 = affine.apply affine_map<() -> (0)>() | |
%158 = affine.apply affine_map<() -> (0)>() | |
%c1_235 = arith.constant 1 : index | |
%c8_236 = arith.constant 8 : index | |
%c8_237 = arith.constant 8 : index | |
%extracted_slice_238 = tensor.extract_slice %extracted_slice_230[0, %155, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%159 = tensor.empty(%c8_237) : tensor<1x?x1152xf32> | |
%unpack_239 = tensor.unpack %extracted_slice_238 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %159 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_240 = arith.constant 8 : index | |
%160 = affine.apply affine_map<() -> (0)>() | |
%161 = affine.apply affine_map<() -> (0)>() | |
%c0_241 = arith.constant 0 : index | |
%c8_242 = arith.constant 8 : index | |
%162 = affine.apply affine_map<() -> (0)>() | |
%163 = affine.apply affine_map<() -> (0)>() | |
%c1_243 = arith.constant 1 : index | |
%c8_244 = arith.constant 8 : index | |
%c8_245 = arith.constant 8 : index | |
%extracted_slice_246 = tensor.extract_slice %extracted_slice_238[0, %160, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%164 = tensor.empty(%c8_245) : tensor<1x?x1152xf32> | |
%unpack_247 = tensor.unpack %extracted_slice_246 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %164 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_248 = arith.constant 8 : index | |
%165 = affine.apply affine_map<() -> (0)>() | |
%166 = affine.apply affine_map<() -> (0)>() | |
%c0_249 = arith.constant 0 : index | |
%c8_250 = arith.constant 8 : index | |
%167 = affine.apply affine_map<() -> (0)>() | |
%168 = affine.apply affine_map<() -> (0)>() | |
%c1_251 = arith.constant 1 : index | |
%c8_252 = arith.constant 8 : index | |
%c8_253 = arith.constant 8 : index | |
%extracted_slice_254 = tensor.extract_slice %extracted_slice_246[0, %165, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%169 = tensor.empty(%c8_253) : tensor<1x?x1152xf32> | |
%unpack_255 = tensor.unpack %extracted_slice_254 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %169 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_256 = arith.constant 8 : index | |
%170 = affine.apply affine_map<() -> (0)>() | |
%171 = affine.apply affine_map<() -> (0)>() | |
%c0_257 = arith.constant 0 : index | |
%c8_258 = arith.constant 8 : index | |
%172 = affine.apply affine_map<() -> (0)>() | |
%173 = affine.apply affine_map<() -> (0)>() | |
%c1_259 = arith.constant 1 : index | |
%c8_260 = arith.constant 8 : index | |
%c8_261 = arith.constant 8 : index | |
%extracted_slice_262 = tensor.extract_slice %extracted_slice_254[0, %170, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%174 = tensor.empty(%c8_261) : tensor<1x?x1152xf32> | |
%unpack_263 = tensor.unpack %extracted_slice_262 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %174 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_264 = arith.constant 8 : index | |
%175 = affine.apply affine_map<() -> (0)>() | |
%176 = affine.apply affine_map<() -> (0)>() | |
%c0_265 = arith.constant 0 : index | |
%c8_266 = arith.constant 8 : index | |
%177 = affine.apply affine_map<() -> (0)>() | |
%178 = affine.apply affine_map<() -> (0)>() | |
%c1_267 = arith.constant 1 : index | |
%c8_268 = arith.constant 8 : index | |
%c8_269 = arith.constant 8 : index | |
%extracted_slice_270 = tensor.extract_slice %extracted_slice_262[0, %175, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%179 = tensor.empty(%c8_269) : tensor<1x?x1152xf32> | |
%unpack_271 = tensor.unpack %extracted_slice_270 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %179 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_272 = tensor.extract_slice %unpack_271[0, %176, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_273 = tensor.extract_slice %unpack_263[0, %171, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_274 = tensor.extract_slice %unpack_255[0, %166, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_275 = tensor.extract_slice %unpack_247[0, %161, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_276 = tensor.extract_slice %unpack_239[0, %156, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_277 = tensor.extract_slice %unpack_231[0, %151, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_278 = tensor.extract_slice %unpack_223[0, %146, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_279 = tensor.extract_slice %unpack_215[0, %141, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_280 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_281 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_282 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_283 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_284 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_285 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_286 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_287 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_288 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_289 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_290 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_291 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_292 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_293 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_294 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_295 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_296 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_297 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_298 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_299 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_300 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_301 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_302 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_303 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_304 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_305 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_306 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%180 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_307 = tensor.extract_slice %180[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_308 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%181 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_272 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_308 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_309: f32, %out: f32): | |
%182 = arith.addf %in, %in_309 : f32 | |
linalg.yield %182 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %181 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_216 = arith.constant 8 : index | |
%145 = affine.apply affine_map<() -> (0)>() | |
%146 = affine.apply affine_map<() -> (0)>() | |
%c0_217 = arith.constant 0 : index | |
%c8_218 = arith.constant 8 : index | |
%147 = affine.apply affine_map<() -> (0)>() | |
%148 = affine.apply affine_map<() -> (0)>() | |
%c1_219 = arith.constant 1 : index | |
%c8_220 = arith.constant 8 : index | |
%c8_221 = arith.constant 8 : index | |
%extracted_slice_222 = tensor.extract_slice %extracted_slice_214[0, %145, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%149 = tensor.empty(%c8_221) : tensor<1x?x1152xf32> | |
%unpack_223 = tensor.unpack %extracted_slice_222 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %149 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_224 = arith.constant 8 : index | |
%150 = affine.apply affine_map<() -> (0)>() | |
%151 = affine.apply affine_map<() -> (0)>() | |
%c0_225 = arith.constant 0 : index | |
%c8_226 = arith.constant 8 : index | |
%152 = affine.apply affine_map<() -> (0)>() | |
%153 = affine.apply affine_map<() -> (0)>() | |
%c1_227 = arith.constant 1 : index | |
%c8_228 = arith.constant 8 : index | |
%c8_229 = arith.constant 8 : index | |
%extracted_slice_230 = tensor.extract_slice %extracted_slice_222[0, %150, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%154 = tensor.empty(%c8_229) : tensor<1x?x1152xf32> | |
%unpack_231 = tensor.unpack %extracted_slice_230 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %154 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_232 = arith.constant 8 : index | |
%155 = affine.apply affine_map<() -> (0)>() | |
%156 = affine.apply affine_map<() -> (0)>() | |
%c0_233 = arith.constant 0 : index | |
%c8_234 = arith.constant 8 : index | |
%157 = affine.apply affine_map<() -> (0)>() | |
%158 = affine.apply affine_map<() -> (0)>() | |
%c1_235 = arith.constant 1 : index | |
%c8_236 = arith.constant 8 : index | |
%c8_237 = arith.constant 8 : index | |
%extracted_slice_238 = tensor.extract_slice %extracted_slice_230[0, %155, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%159 = tensor.empty(%c8_237) : tensor<1x?x1152xf32> | |
%unpack_239 = tensor.unpack %extracted_slice_238 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %159 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_240 = arith.constant 8 : index | |
%160 = affine.apply affine_map<() -> (0)>() | |
%161 = affine.apply affine_map<() -> (0)>() | |
%c0_241 = arith.constant 0 : index | |
%c8_242 = arith.constant 8 : index | |
%162 = affine.apply affine_map<() -> (0)>() | |
%163 = affine.apply affine_map<() -> (0)>() | |
%c1_243 = arith.constant 1 : index | |
%c8_244 = arith.constant 8 : index | |
%c8_245 = arith.constant 8 : index | |
%extracted_slice_246 = tensor.extract_slice %extracted_slice_238[0, %160, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%164 = tensor.empty(%c8_245) : tensor<1x?x1152xf32> | |
%unpack_247 = tensor.unpack %extracted_slice_246 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %164 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_248 = arith.constant 8 : index | |
%165 = affine.apply affine_map<() -> (0)>() | |
%166 = affine.apply affine_map<() -> (0)>() | |
%c0_249 = arith.constant 0 : index | |
%c8_250 = arith.constant 8 : index | |
%167 = affine.apply affine_map<() -> (0)>() | |
%168 = affine.apply affine_map<() -> (0)>() | |
%c1_251 = arith.constant 1 : index | |
%c8_252 = arith.constant 8 : index | |
%c8_253 = arith.constant 8 : index | |
%extracted_slice_254 = tensor.extract_slice %extracted_slice_246[0, %165, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%169 = tensor.empty(%c8_253) : tensor<1x?x1152xf32> | |
%unpack_255 = tensor.unpack %extracted_slice_254 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %169 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_256 = arith.constant 8 : index | |
%170 = affine.apply affine_map<() -> (0)>() | |
%171 = affine.apply affine_map<() -> (0)>() | |
%c0_257 = arith.constant 0 : index | |
%c8_258 = arith.constant 8 : index | |
%172 = affine.apply affine_map<() -> (0)>() | |
%173 = affine.apply affine_map<() -> (0)>() | |
%c1_259 = arith.constant 1 : index | |
%c8_260 = arith.constant 8 : index | |
%c8_261 = arith.constant 8 : index | |
%extracted_slice_262 = tensor.extract_slice %extracted_slice_254[0, %170, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%174 = tensor.empty(%c8_261) : tensor<1x?x1152xf32> | |
%unpack_263 = tensor.unpack %extracted_slice_262 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %174 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_264 = arith.constant 8 : index | |
%175 = affine.apply affine_map<() -> (0)>() | |
%176 = affine.apply affine_map<() -> (0)>() | |
%c0_265 = arith.constant 0 : index | |
%c8_266 = arith.constant 8 : index | |
%177 = affine.apply affine_map<() -> (0)>() | |
%178 = affine.apply affine_map<() -> (0)>() | |
%c1_267 = arith.constant 1 : index | |
%c8_268 = arith.constant 8 : index | |
%c8_269 = arith.constant 8 : index | |
%extracted_slice_270 = tensor.extract_slice %extracted_slice_262[0, %175, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%179 = tensor.empty(%c8_269) : tensor<1x?x1152xf32> | |
%unpack_271 = tensor.unpack %extracted_slice_270 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %179 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_272 = arith.constant 8 : index | |
%180 = affine.apply affine_map<() -> (0)>() | |
%181 = affine.apply affine_map<() -> (0)>() | |
%c0_273 = arith.constant 0 : index | |
%c8_274 = arith.constant 8 : index | |
%182 = affine.apply affine_map<() -> (0)>() | |
%183 = affine.apply affine_map<() -> (0)>() | |
%c1_275 = arith.constant 1 : index | |
%c8_276 = arith.constant 8 : index | |
%c8_277 = arith.constant 8 : index | |
%extracted_slice_278 = tensor.extract_slice %extracted_slice_270[0, %180, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%184 = tensor.empty(%c8_277) : tensor<1x?x1152xf32> | |
%unpack_279 = tensor.unpack %extracted_slice_278 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %184 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%extracted_slice_280 = tensor.extract_slice %unpack_279[0, %181, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_281 = tensor.extract_slice %unpack_271[0, %176, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_282 = tensor.extract_slice %unpack_263[0, %171, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_283 = tensor.extract_slice %unpack_255[0, %166, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_284 = tensor.extract_slice %unpack_247[0, %161, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_285 = tensor.extract_slice %unpack_239[0, %156, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_286 = tensor.extract_slice %unpack_231[0, %151, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_287 = tensor.extract_slice %unpack_223[0, %146, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_288 = tensor.extract_slice %unpack_215[0, %141, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_289 = tensor.extract_slice %unpack_207[0, %136, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_290 = tensor.extract_slice %unpack_199[0, %131, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_291 = tensor.extract_slice %unpack_191[0, %126, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_292 = tensor.extract_slice %unpack_183[0, %121, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_293 = tensor.extract_slice %unpack_175[0, %116, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_294 = tensor.extract_slice %unpack_167[0, %111, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_295 = tensor.extract_slice %unpack_159[0, %106, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_296 = tensor.extract_slice %unpack_151[0, %101, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_297 = tensor.extract_slice %unpack_143[0, %96, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_298 = tensor.extract_slice %unpack_135[0, %91, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_299 = tensor.extract_slice %unpack_127[0, %86, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_300 = tensor.extract_slice %unpack_119[0, %81, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_301 = tensor.extract_slice %unpack_111[0, %76, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_302 = tensor.extract_slice %unpack_103[0, %71, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_303 = tensor.extract_slice %unpack_95[0, %66, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_304 = tensor.extract_slice %unpack_87[0, %61, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_305 = tensor.extract_slice %unpack_79[0, %56, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_306 = tensor.extract_slice %unpack_71[0, %51, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_307 = tensor.extract_slice %unpack_63[0, %46, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_308 = tensor.extract_slice %unpack_55[0, %41, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_309 = tensor.extract_slice %unpack_47[0, %36, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_310 = tensor.extract_slice %unpack_39[0, %31, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_311 = tensor.extract_slice %unpack_31[0, %26, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_312 = tensor.extract_slice %unpack_23[0, %21, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_313 = tensor.extract_slice %unpack_15[0, %16, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_314 = tensor.extract_slice %unpack_7[0, %10, 0] [1, 1, 1152] [1, 1, 1] : tensor<1x?x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_315 = tensor.extract_slice %unpack[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%185 = tensor.empty() : tensor<1x1x1152xf32> | |
%extracted_slice_316 = tensor.extract_slice %185[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_317 = tensor.extract_slice %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%186 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%extracted_slice_0, %extracted_slice_280 : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%extracted_slice_317 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_318: f32, %out: f32): | |
%187 = arith.addf %in, %in_318 : f32 | |
linalg.yield %187 : f32 | |
} -> tensor<1x1x1152xf32> | |
scf.forall.in_parallel { | |
tensor.parallel_insert_slice %186 into %arg1[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> into tensor<1x1x1152xf32> | |
} | |
} {mapping = [#iree_codegen.workgroup_mapping<x>]} | |
flow.dispatch.tensor.store %6, %2, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : tensor<1x1x1152xf32> -> !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
return | |
} | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
Inserting anonymous constraint set column 0 | |
Processing value bounds worklist... | |
func.func @time_out_dispatch_0_unpack_elementwise_1x1x1152_f32() attributes {translation_info = #iree_codegen.translation_info<CPUDoubleTilingExpert>} { | |
%c0 = arith.constant 0 : index | |
%0 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> | |
%1 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> | |
%2 = hal.interface.binding.subspan layout(<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor<writeonly:tensor<1x1x1152xf32>> | |
%3 = flow.dispatch.tensor.load %0, offsets = [0, 0, 0, 0, 0], sizes = [1, 1, 288, 8, 4], strides = [1, 1, 1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x288x8x4xf32>> -> tensor<1x1x288x8x4xf32> | |
%4 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%5 = tensor.empty() : tensor<1x1x1152xf32> | |
%unpack = tensor.unpack %3 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %5 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x1x1152xf32> | |
%6 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%4, %unpack : tensor<1x1x1152xf32>, tensor<1x1x1152xf32>) outs(%5 : tensor<1x1x1152xf32>) attrs = {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} { | |
^bb0(%in: f32, %in_0: f32, %out: f32): | |
%8 = arith.addf %in, %in_0 : f32 | |
linalg.yield %8 : f32 | |
} -> tensor<1x1x1152xf32> | |
%7 = scf.forall (%arg0) = (0) to (1152) step (1152) shared_outs(%arg1 = %5) -> (tensor<1x1x1152xf32>) { | |
%8 = flow.dispatch.tensor.load %1, offsets = [0, 0, 0], sizes = [1, 1, 1152], strides = [1, 1, 1] : !flow.dispatch.tensor<readonly:tensor<1x1x1152xf32>> -> tensor<1x1x1152xf32> | |
%extracted_slice = tensor.extract_slice %8[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%extracted_slice_0 = tensor.extract_slice %4[0, 0, %arg0] [1, 1, 1152] [1, 1, 1] : tensor<1x1x1152xf32> to tensor<1x1x1152xf32> | |
%c0_1 = arith.constant 0 : index | |
%c8 = arith.constant 8 : index | |
%9 = affine.apply affine_map<() -> (0)>() | |
%10 = affine.apply affine_map<() -> (0)>() | |
%c0_2 = arith.constant 0 : index | |
%c8_3 = arith.constant 8 : index | |
%11 = affine.apply affine_map<() -> (0)>() | |
%12 = affine.apply affine_map<() -> (0)>() | |
%c1 = arith.constant 1 : index | |
%c8_4 = arith.constant 8 : index | |
%c8_5 = arith.constant 8 : index | |
%13 = affine.apply affine_map<(d0) -> (d0 floordiv 4)>(%arg0) | |
%extracted_slice_6 = tensor.extract_slice %3[0, %9, %13, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%14 = tensor.empty(%c8_5) : tensor<1x?x1152xf32> | |
%unpack_7 = tensor.unpack %extracted_slice_6 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %14 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_8 = arith.constant 8 : index | |
%15 = affine.apply affine_map<() -> (0)>() | |
%16 = affine.apply affine_map<() -> (0)>() | |
%c0_9 = arith.constant 0 : index | |
%c8_10 = arith.constant 8 : index | |
%17 = affine.apply affine_map<() -> (0)>() | |
%18 = affine.apply affine_map<() -> (0)>() | |
%c1_11 = arith.constant 1 : index | |
%c8_12 = arith.constant 8 : index | |
%c8_13 = arith.constant 8 : index | |
%extracted_slice_14 = tensor.extract_slice %extracted_slice_6[0, %15, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%19 = tensor.empty(%c8_13) : tensor<1x?x1152xf32> | |
%unpack_15 = tensor.unpack %extracted_slice_14 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %19 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_16 = arith.constant 8 : index | |
%20 = affine.apply affine_map<() -> (0)>() | |
%21 = affine.apply affine_map<() -> (0)>() | |
%c0_17 = arith.constant 0 : index | |
%c8_18 = arith.constant 8 : index | |
%22 = affine.apply affine_map<() -> (0)>() | |
%23 = affine.apply affine_map<() -> (0)>() | |
%c1_19 = arith.constant 1 : index | |
%c8_20 = arith.constant 8 : index | |
%c8_21 = arith.constant 8 : index | |
%extracted_slice_22 = tensor.extract_slice %extracted_slice_14[0, %20, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%24 = tensor.empty(%c8_21) : tensor<1x?x1152xf32> | |
%unpack_23 = tensor.unpack %extracted_slice_22 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %24 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_24 = arith.constant 8 : index | |
%25 = affine.apply affine_map<() -> (0)>() | |
%26 = affine.apply affine_map<() -> (0)>() | |
%c0_25 = arith.constant 0 : index | |
%c8_26 = arith.constant 8 : index | |
%27 = affine.apply affine_map<() -> (0)>() | |
%28 = affine.apply affine_map<() -> (0)>() | |
%c1_27 = arith.constant 1 : index | |
%c8_28 = arith.constant 8 : index | |
%c8_29 = arith.constant 8 : index | |
%extracted_slice_30 = tensor.extract_slice %extracted_slice_22[0, %25, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%29 = tensor.empty(%c8_29) : tensor<1x?x1152xf32> | |
%unpack_31 = tensor.unpack %extracted_slice_30 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %29 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_32 = arith.constant 8 : index | |
%30 = affine.apply affine_map<() -> (0)>() | |
%31 = affine.apply affine_map<() -> (0)>() | |
%c0_33 = arith.constant 0 : index | |
%c8_34 = arith.constant 8 : index | |
%32 = affine.apply affine_map<() -> (0)>() | |
%33 = affine.apply affine_map<() -> (0)>() | |
%c1_35 = arith.constant 1 : index | |
%c8_36 = arith.constant 8 : index | |
%c8_37 = arith.constant 8 : index | |
%extracted_slice_38 = tensor.extract_slice %extracted_slice_30[0, %30, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%34 = tensor.empty(%c8_37) : tensor<1x?x1152xf32> | |
%unpack_39 = tensor.unpack %extracted_slice_38 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %34 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_40 = arith.constant 8 : index | |
%35 = affine.apply affine_map<() -> (0)>() | |
%36 = affine.apply affine_map<() -> (0)>() | |
%c0_41 = arith.constant 0 : index | |
%c8_42 = arith.constant 8 : index | |
%37 = affine.apply affine_map<() -> (0)>() | |
%38 = affine.apply affine_map<() -> (0)>() | |
%c1_43 = arith.constant 1 : index | |
%c8_44 = arith.constant 8 : index | |
%c8_45 = arith.constant 8 : index | |
%extracted_slice_46 = tensor.extract_slice %extracted_slice_38[0, %35, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%39 = tensor.empty(%c8_45) : tensor<1x?x1152xf32> | |
%unpack_47 = tensor.unpack %extracted_slice_46 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %39 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_48 = arith.constant 8 : index | |
%40 = affine.apply affine_map<() -> (0)>() | |
%41 = affine.apply affine_map<() -> (0)>() | |
%c0_49 = arith.constant 0 : index | |
%c8_50 = arith.constant 8 : index | |
%42 = affine.apply affine_map<() -> (0)>() | |
%43 = affine.apply affine_map<() -> (0)>() | |
%c1_51 = arith.constant 1 : index | |
%c8_52 = arith.constant 8 : index | |
%c8_53 = arith.constant 8 : index | |
%extracted_slice_54 = tensor.extract_slice %extracted_slice_46[0, %40, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%44 = tensor.empty(%c8_53) : tensor<1x?x1152xf32> | |
%unpack_55 = tensor.unpack %extracted_slice_54 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %44 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_56 = arith.constant 8 : index | |
%45 = affine.apply affine_map<() -> (0)>() | |
%46 = affine.apply affine_map<() -> (0)>() | |
%c0_57 = arith.constant 0 : index | |
%c8_58 = arith.constant 8 : index | |
%47 = affine.apply affine_map<() -> (0)>() | |
%48 = affine.apply affine_map<() -> (0)>() | |
%c1_59 = arith.constant 1 : index | |
%c8_60 = arith.constant 8 : index | |
%c8_61 = arith.constant 8 : index | |
%extracted_slice_62 = tensor.extract_slice %extracted_slice_54[0, %45, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%49 = tensor.empty(%c8_61) : tensor<1x?x1152xf32> | |
%unpack_63 = tensor.unpack %extracted_slice_62 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %49 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_64 = arith.constant 8 : index | |
%50 = affine.apply affine_map<() -> (0)>() | |
%51 = affine.apply affine_map<() -> (0)>() | |
%c0_65 = arith.constant 0 : index | |
%c8_66 = arith.constant 8 : index | |
%52 = affine.apply affine_map<() -> (0)>() | |
%53 = affine.apply affine_map<() -> (0)>() | |
%c1_67 = arith.constant 1 : index | |
%c8_68 = arith.constant 8 : index | |
%c8_69 = arith.constant 8 : index | |
%extracted_slice_70 = tensor.extract_slice %extracted_slice_62[0, %50, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%54 = tensor.empty(%c8_69) : tensor<1x?x1152xf32> | |
%unpack_71 = tensor.unpack %extracted_slice_70 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %54 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_72 = arith.constant 8 : index | |
%55 = affine.apply affine_map<() -> (0)>() | |
%56 = affine.apply affine_map<() -> (0)>() | |
%c0_73 = arith.constant 0 : index | |
%c8_74 = arith.constant 8 : index | |
%57 = affine.apply affine_map<() -> (0)>() | |
%58 = affine.apply affine_map<() -> (0)>() | |
%c1_75 = arith.constant 1 : index | |
%c8_76 = arith.constant 8 : index | |
%c8_77 = arith.constant 8 : index | |
%extracted_slice_78 = tensor.extract_slice %extracted_slice_70[0, %55, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%59 = tensor.empty(%c8_77) : tensor<1x?x1152xf32> | |
%unpack_79 = tensor.unpack %extracted_slice_78 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %59 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_80 = arith.constant 8 : index | |
%60 = affine.apply affine_map<() -> (0)>() | |
%61 = affine.apply affine_map<() -> (0)>() | |
%c0_81 = arith.constant 0 : index | |
%c8_82 = arith.constant 8 : index | |
%62 = affine.apply affine_map<() -> (0)>() | |
%63 = affine.apply affine_map<() -> (0)>() | |
%c1_83 = arith.constant 1 : index | |
%c8_84 = arith.constant 8 : index | |
%c8_85 = arith.constant 8 : index | |
%extracted_slice_86 = tensor.extract_slice %extracted_slice_78[0, %60, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%64 = tensor.empty(%c8_85) : tensor<1x?x1152xf32> | |
%unpack_87 = tensor.unpack %extracted_slice_86 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %64 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_88 = arith.constant 8 : index | |
%65 = affine.apply affine_map<() -> (0)>() | |
%66 = affine.apply affine_map<() -> (0)>() | |
%c0_89 = arith.constant 0 : index | |
%c8_90 = arith.constant 8 : index | |
%67 = affine.apply affine_map<() -> (0)>() | |
%68 = affine.apply affine_map<() -> (0)>() | |
%c1_91 = arith.constant 1 : index | |
%c8_92 = arith.constant 8 : index | |
%c8_93 = arith.constant 8 : index | |
%extracted_slice_94 = tensor.extract_slice %extracted_slice_86[0, %65, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%69 = tensor.empty(%c8_93) : tensor<1x?x1152xf32> | |
%unpack_95 = tensor.unpack %extracted_slice_94 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %69 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_96 = arith.constant 8 : index | |
%70 = affine.apply affine_map<() -> (0)>() | |
%71 = affine.apply affine_map<() -> (0)>() | |
%c0_97 = arith.constant 0 : index | |
%c8_98 = arith.constant 8 : index | |
%72 = affine.apply affine_map<() -> (0)>() | |
%73 = affine.apply affine_map<() -> (0)>() | |
%c1_99 = arith.constant 1 : index | |
%c8_100 = arith.constant 8 : index | |
%c8_101 = arith.constant 8 : index | |
%extracted_slice_102 = tensor.extract_slice %extracted_slice_94[0, %70, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%74 = tensor.empty(%c8_101) : tensor<1x?x1152xf32> | |
%unpack_103 = tensor.unpack %extracted_slice_102 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %74 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_104 = arith.constant 8 : index | |
%75 = affine.apply affine_map<() -> (0)>() | |
%76 = affine.apply affine_map<() -> (0)>() | |
%c0_105 = arith.constant 0 : index | |
%c8_106 = arith.constant 8 : index | |
%77 = affine.apply affine_map<() -> (0)>() | |
%78 = affine.apply affine_map<() -> (0)>() | |
%c1_107 = arith.constant 1 : index | |
%c8_108 = arith.constant 8 : index | |
%c8_109 = arith.constant 8 : index | |
%extracted_slice_110 = tensor.extract_slice %extracted_slice_102[0, %75, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%79 = tensor.empty(%c8_109) : tensor<1x?x1152xf32> | |
%unpack_111 = tensor.unpack %extracted_slice_110 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %79 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_112 = arith.constant 8 : index | |
%80 = affine.apply affine_map<() -> (0)>() | |
%81 = affine.apply affine_map<() -> (0)>() | |
%c0_113 = arith.constant 0 : index | |
%c8_114 = arith.constant 8 : index | |
%82 = affine.apply affine_map<() -> (0)>() | |
%83 = affine.apply affine_map<() -> (0)>() | |
%c1_115 = arith.constant 1 : index | |
%c8_116 = arith.constant 8 : index | |
%c8_117 = arith.constant 8 : index | |
%extracted_slice_118 = tensor.extract_slice %extracted_slice_110[0, %80, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%84 = tensor.empty(%c8_117) : tensor<1x?x1152xf32> | |
%unpack_119 = tensor.unpack %extracted_slice_118 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %84 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_120 = arith.constant 8 : index | |
%85 = affine.apply affine_map<() -> (0)>() | |
%86 = affine.apply affine_map<() -> (0)>() | |
%c0_121 = arith.constant 0 : index | |
%c8_122 = arith.constant 8 : index | |
%87 = affine.apply affine_map<() -> (0)>() | |
%88 = affine.apply affine_map<() -> (0)>() | |
%c1_123 = arith.constant 1 : index | |
%c8_124 = arith.constant 8 : index | |
%c8_125 = arith.constant 8 : index | |
%extracted_slice_126 = tensor.extract_slice %extracted_slice_118[0, %85, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%89 = tensor.empty(%c8_125) : tensor<1x?x1152xf32> | |
%unpack_127 = tensor.unpack %extracted_slice_126 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %89 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_128 = arith.constant 8 : index | |
%90 = affine.apply affine_map<() -> (0)>() | |
%91 = affine.apply affine_map<() -> (0)>() | |
%c0_129 = arith.constant 0 : index | |
%c8_130 = arith.constant 8 : index | |
%92 = affine.apply affine_map<() -> (0)>() | |
%93 = affine.apply affine_map<() -> (0)>() | |
%c1_131 = arith.constant 1 : index | |
%c8_132 = arith.constant 8 : index | |
%c8_133 = arith.constant 8 : index | |
%extracted_slice_134 = tensor.extract_slice %extracted_slice_126[0, %90, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%94 = tensor.empty(%c8_133) : tensor<1x?x1152xf32> | |
%unpack_135 = tensor.unpack %extracted_slice_134 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %94 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_136 = arith.constant 8 : index | |
%95 = affine.apply affine_map<() -> (0)>() | |
%96 = affine.apply affine_map<() -> (0)>() | |
%c0_137 = arith.constant 0 : index | |
%c8_138 = arith.constant 8 : index | |
%97 = affine.apply affine_map<() -> (0)>() | |
%98 = affine.apply affine_map<() -> (0)>() | |
%c1_139 = arith.constant 1 : index | |
%c8_140 = arith.constant 8 : index | |
%c8_141 = arith.constant 8 : index | |
%extracted_slice_142 = tensor.extract_slice %extracted_slice_134[0, %95, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%99 = tensor.empty(%c8_141) : tensor<1x?x1152xf32> | |
%unpack_143 = tensor.unpack %extracted_slice_142 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %99 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_144 = arith.constant 8 : index | |
%100 = affine.apply affine_map<() -> (0)>() | |
%101 = affine.apply affine_map<() -> (0)>() | |
%c0_145 = arith.constant 0 : index | |
%c8_146 = arith.constant 8 : index | |
%102 = affine.apply affine_map<() -> (0)>() | |
%103 = affine.apply affine_map<() -> (0)>() | |
%c1_147 = arith.constant 1 : index | |
%c8_148 = arith.constant 8 : index | |
%c8_149 = arith.constant 8 : index | |
%extracted_slice_150 = tensor.extract_slice %extracted_slice_142[0, %100, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%104 = tensor.empty(%c8_149) : tensor<1x?x1152xf32> | |
%unpack_151 = tensor.unpack %extracted_slice_150 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %104 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_152 = arith.constant 8 : index | |
%105 = affine.apply affine_map<() -> (0)>() | |
%106 = affine.apply affine_map<() -> (0)>() | |
%c0_153 = arith.constant 0 : index | |
%c8_154 = arith.constant 8 : index | |
%107 = affine.apply affine_map<() -> (0)>() | |
%108 = affine.apply affine_map<() -> (0)>() | |
%c1_155 = arith.constant 1 : index | |
%c8_156 = arith.constant 8 : index | |
%c8_157 = arith.constant 8 : index | |
%extracted_slice_158 = tensor.extract_slice %extracted_slice_150[0, %105, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%109 = tensor.empty(%c8_157) : tensor<1x?x1152xf32> | |
%unpack_159 = tensor.unpack %extracted_slice_158 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %109 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_160 = arith.constant 8 : index | |
%110 = affine.apply affine_map<() -> (0)>() | |
%111 = affine.apply affine_map<() -> (0)>() | |
%c0_161 = arith.constant 0 : index | |
%c8_162 = arith.constant 8 : index | |
%112 = affine.apply affine_map<() -> (0)>() | |
%113 = affine.apply affine_map<() -> (0)>() | |
%c1_163 = arith.constant 1 : index | |
%c8_164 = arith.constant 8 : index | |
%c8_165 = arith.constant 8 : index | |
%extracted_slice_166 = tensor.extract_slice %extracted_slice_158[0, %110, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%114 = tensor.empty(%c8_165) : tensor<1x?x1152xf32> | |
%unpack_167 = tensor.unpack %extracted_slice_166 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %114 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_168 = arith.constant 8 : index | |
%115 = affine.apply affine_map<() -> (0)>() | |
%116 = affine.apply affine_map<() -> (0)>() | |
%c0_169 = arith.constant 0 : index | |
%c8_170 = arith.constant 8 : index | |
%117 = affine.apply affine_map<() -> (0)>() | |
%118 = affine.apply affine_map<() -> (0)>() | |
%c1_171 = arith.constant 1 : index | |
%c8_172 = arith.constant 8 : index | |
%c8_173 = arith.constant 8 : index | |
%extracted_slice_174 = tensor.extract_slice %extracted_slice_166[0, %115, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%119 = tensor.empty(%c8_173) : tensor<1x?x1152xf32> | |
%unpack_175 = tensor.unpack %extracted_slice_174 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %119 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_176 = arith.constant 8 : index | |
%120 = affine.apply affine_map<() -> (0)>() | |
%121 = affine.apply affine_map<() -> (0)>() | |
%c0_177 = arith.constant 0 : index | |
%c8_178 = arith.constant 8 : index | |
%122 = affine.apply affine_map<() -> (0)>() | |
%123 = affine.apply affine_map<() -> (0)>() | |
%c1_179 = arith.constant 1 : index | |
%c8_180 = arith.constant 8 : index | |
%c8_181 = arith.constant 8 : index | |
%extracted_slice_182 = tensor.extract_slice %extracted_slice_174[0, %120, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%124 = tensor.empty(%c8_181) : tensor<1x?x1152xf32> | |
%unpack_183 = tensor.unpack %extracted_slice_182 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %124 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_184 = arith.constant 8 : index | |
%125 = affine.apply affine_map<() -> (0)>() | |
%126 = affine.apply affine_map<() -> (0)>() | |
%c0_185 = arith.constant 0 : index | |
%c8_186 = arith.constant 8 : index | |
%127 = affine.apply affine_map<() -> (0)>() | |
%128 = affine.apply affine_map<() -> (0)>() | |
%c1_187 = arith.constant 1 : index | |
%c8_188 = arith.constant 8 : index | |
%c8_189 = arith.constant 8 : index | |
%extracted_slice_190 = tensor.extract_slice %extracted_slice_182[0, %125, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%129 = tensor.empty(%c8_189) : tensor<1x?x1152xf32> | |
%unpack_191 = tensor.unpack %extracted_slice_190 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %129 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_192 = arith.constant 8 : index | |
%130 = affine.apply affine_map<() -> (0)>() | |
%131 = affine.apply affine_map<() -> (0)>() | |
%c0_193 = arith.constant 0 : index | |
%c8_194 = arith.constant 8 : index | |
%132 = affine.apply affine_map<() -> (0)>() | |
%133 = affine.apply affine_map<() -> (0)>() | |
%c1_195 = arith.constant 1 : index | |
%c8_196 = arith.constant 8 : index | |
%c8_197 = arith.constant 8 : index | |
%extracted_slice_198 = tensor.extract_slice %extracted_slice_190[0, %130, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%134 = tensor.empty(%c8_197) : tensor<1x?x1152xf32> | |
%unpack_199 = tensor.unpack %extracted_slice_198 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %134 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_200 = arith.constant 8 : index | |
%135 = affine.apply affine_map<() -> (0)>() | |
%136 = affine.apply affine_map<() -> (0)>() | |
%c0_201 = arith.constant 0 : index | |
%c8_202 = arith.constant 8 : index | |
%137 = affine.apply affine_map<() -> (0)>() | |
%138 = affine.apply affine_map<() -> (0)>() | |
%c1_203 = arith.constant 1 : index | |
%c8_204 = arith.constant 8 : index | |
%c8_205 = arith.constant 8 : index | |
%extracted_slice_206 = tensor.extract_slice %extracted_slice_198[0, %135, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%139 = tensor.empty(%c8_205) : tensor<1x?x1152xf32> | |
%unpack_207 = tensor.unpack %extracted_slice_206 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %139 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_208 = arith.constant 8 : index | |
%140 = affine.apply affine_map<() -> (0)>() | |
%141 = affine.apply affine_map<() -> (0)>() | |
%c0_209 = arith.constant 0 : index | |
%c8_210 = arith.constant 8 : index | |
%142 = affine.apply affine_map<() -> (0)>() | |
%143 = affine.apply affine_map<() -> (0)>() | |
%c1_211 = arith.constant 1 : index | |
%c8_212 = arith.constant 8 : index | |
%c8_213 = arith.constant 8 : index | |
%extracted_slice_214 = tensor.extract_slice %extracted_slice_206[0, %140, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%144 = tensor.empty(%c8_213) : tensor<1x?x1152xf32> | |
%unpack_215 = tensor.unpack %extracted_slice_214 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %144 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_216 = arith.constant 8 : index | |
%145 = affine.apply affine_map<() -> (0)>() | |
%146 = affine.apply affine_map<() -> (0)>() | |
%c0_217 = arith.constant 0 : index | |
%c8_218 = arith.constant 8 : index | |
%147 = affine.apply affine_map<() -> (0)>() | |
%148 = affine.apply affine_map<() -> (0)>() | |
%c1_219 = arith.constant 1 : index | |
%c8_220 = arith.constant 8 : index | |
%c8_221 = arith.constant 8 : index | |
%extracted_slice_222 = tensor.extract_slice %extracted_slice_214[0, %145, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%149 = tensor.empty(%c8_221) : tensor<1x?x1152xf32> | |
%unpack_223 = tensor.unpack %extracted_slice_222 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %149 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_224 = arith.constant 8 : index | |
%150 = affine.apply affine_map<() -> (0)>() | |
%151 = affine.apply affine_map<() -> (0)>() | |
%c0_225 = arith.constant 0 : index | |
%c8_226 = arith.constant 8 : index | |
%152 = affine.apply affine_map<() -> (0)>() | |
%153 = affine.apply affine_map<() -> (0)>() | |
%c1_227 = arith.constant 1 : index | |
%c8_228 = arith.constant 8 : index | |
%c8_229 = arith.constant 8 : index | |
%extracted_slice_230 = tensor.extract_slice %extracted_slice_222[0, %150, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%154 = tensor.empty(%c8_229) : tensor<1x?x1152xf32> | |
%unpack_231 = tensor.unpack %extracted_slice_230 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %154 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_232 = arith.constant 8 : index | |
%155 = affine.apply affine_map<() -> (0)>() | |
%156 = affine.apply affine_map<() -> (0)>() | |
%c0_233 = arith.constant 0 : index | |
%c8_234 = arith.constant 8 : index | |
%157 = affine.apply affine_map<() -> (0)>() | |
%158 = affine.apply affine_map<() -> (0)>() | |
%c1_235 = arith.constant 1 : index | |
%c8_236 = arith.constant 8 : index | |
%c8_237 = arith.constant 8 : index | |
%extracted_slice_238 = tensor.extract_slice %extracted_slice_230[0, %155, 0, 0, 0] [1, 1, 288, 8, 4] [1, 1, 1, 1, 1] : tensor<1x1x288x8x4xf32> to tensor<1x1x288x8x4xf32> | |
%159 = tensor.empty(%c8_237) : tensor<1x?x1152xf32> | |
%unpack_239 = tensor.unpack %extracted_slice_238 outer_dims_perm = [0, 1, 2] inner_dims_pos = [1, 2] inner_tiles = [8, 4] into %159 {lowering_config = #iree_codegen.lowering_config<tile_sizes = [[0, 0, 1152], [1, 8, 16], [0, 0, 0], [0, 0, 0]]>} : tensor<1x1x288x8x4xf32> -> tensor<1x?x1152xf32> | |
%c8_240 = arith.constant 8 : index | |
%160 = affine.apply affine_map<() -> (0)>() | |
%161 = affine.apply affine_map<() -> (0)>() | |
%c0_241 = arith.constant 0 : index | |
%c8_242 = arith.constant 8 : index | |
%162 = affine.apply affine_map<() -> (0)>() | |
%163 = affine.apply affin |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment