Created
February 10, 2025 03:34
-
-
Save AmosLewis/3860a0371236b528b24c86c48e1e31c2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(.venv) ➜ shark-ai git:(users/dan-garvey/enable_custom_fp8_matmul) ✗ python -m sharktank.examples.paged_llm_v1 --irpa-file=/home/chi/src/test/llama/dan/fp8.irpa --tokenizer-config-json=/home/chi/src/test/llama/dan/tokenizer.json --dump-bins "t" | |
/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/iree/turbine/aot/params.py:163: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:206.) | |
return torch.from_numpy(wrapper) | |
:: Prompting: | |
b't' | |
:: Prompt tokens: tensor([[83, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) | |
:: Invoke prefill: | |
failed to translate executables | |
<unknown>:0: error: 'llvm.fpext' op operand #0 must be floating point LLVM type or LLVM dialect-compatible vector of floating point LLVM type, but got 'vector<16xi8>' | |
<unknown>:0: note: see current operation: %292 = "llvm.fpext"(%281) : (vector<16xi8>) -> vector<16xf32> | |
source.mlir:16:10: error: failed to run translation of source executable to target executable for backend #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,-amx-fp8,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,+avx512ifma,+xsave,+sse4.2,-tsxldtrk,-sm3,-ptwrite,-widekl,-movrs,+invpcid,+64bit,+xsavec,-avx10.1-512,+avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-ccmp,-amx-int8,-kl,-avx10.1-256,+evex512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,-sha512,+vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-avx10.2-256,+gfni,-avxvnniint16,-amx-fp16,-zu,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,+avx512bf16,+avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,+pku,-nf,-amx-tf32,-amx-avx512,+fsgsbase,+clzero,+mwaitx,-lwp,+lzcnt,+sha,-movdir64b,-ppx,+wbnoinvd,-enqcmd,-amx-transpose,-avx10.2-512,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,+avx512bitalg,+rdpru,+clwb,+mmx,+sse2,+rdseed,+avx512vbmi2,-prefetchi,-amx-movrs,+rdpid,-fma4,+avx512vbmi,+shstk,+vaes,-waitpkg,-sgx,+fxsr,+avx512dq,+sse4a", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}> | |
%2 = linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : tensor<1x32x4096xf8E4M3FNUZ>, tensor<1x4096x4096xf8E4M3FNUZ>) outs(%1 : tensor<1x32x4096xf32>) -> tensor<1x32x4096xf32> | |
^ | |
source.mlir:2:3: note: called from | |
func.func @main(%arg0: tensor<1x32x4096xf8E4M3FNUZ>, %arg1: tensor<1x4096x4096xf8E4M3FNUZ>) -> tensor<1x32x4096xf32> { | |
^ | |
source.mlir:16:10: note: see current operation: | |
"hal.executable.variant"() ({ | |
"hal.executable.export"() ({ | |
^bb0(%arg3: !hal.device): | |
%567 = "arith.constant"() <{value = 256 : index}> : () -> index | |
%568 = "arith.constant"() <{value = 2 : index}> : () -> index | |
%569 = "arith.constant"() <{value = 1 : index}> : () -> index | |
"hal.return"(%567, %568, %569) : (index, index, index) -> () | |
}) {layout = #hal.pipeline.layout<bindings = [#hal.pipeline.binding<storage_buffer, "ReadOnly|Indirect">, #hal.pipeline.binding<storage_buffer, Indirect>], flags = Indirect>, ordinal = 0 : index, sym_name = "main_dispatch_2_batch_mmt4d_1x2x256x4096x16x16x1_f8E4M3FNUZxf8E4M3FNUZxf32", workgroup_size = [1 : index, 1 : index, 1 : index]} : () -> () | |
"builtin.module"() ({ | |
"llvm.func"() <{CConv = #llvm.cconv<ccc>, arg_attrs = [{llvm.align = 16 : i64, llvm.noalias, llvm.nonnull, llvm.noundef}, {llvm.align = 16 : i64, llvm.noalias, llvm.nonnull, llvm.noundef}, {llvm.align = 16 : i64, llvm.noalias, llvm.nonnull, llvm.noundef}], function_type = !llvm.func<i32 (ptr, ptr, ptr)>, linkage = #llvm.linkage<external>, sym_name = "main_dispatch_2_batch_mmt4d_1x2x256x4096x16x16x1_f8E4M3FNUZxf8E4M3FNUZxf32", visibility_ = 0 : i64}> ({ | |
^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr): | |
%0 = "llvm.mlir.constant"() <{value = 15 : i64}> : () -> i64 | |
%1 = "llvm.mlir.constant"() <{value = 14 : i64}> : () -> i64 | |
%2 = "llvm.mlir.constant"() <{value = 13 : i64}> : () -> i64 | |
%3 = "llvm.mlir.constant"() <{value = 12 : i64}> : () -> i64 | |
%4 = "llvm.mlir.constant"() <{value = 11 : i64}> : () -> i64 | |
%5 = "llvm.mlir.constant"() <{value = 10 : i64}> : () -> i64 | |
%6 = "llvm.mlir.constant"() <{value = 9 : i64}> : () -> i64 | |
%7 = "llvm.mlir.constant"() <{value = 8 : i64}> : () -> i64 | |
%8 = "llvm.mlir.constant"() <{value = 7 : i64}> : () -> i64 | |
%9 = "llvm.mlir.constant"() <{value = 6 : i64}> : () -> i64 | |
%10 = "llvm.mlir.constant"() <{value = 5 : i64}> : () -> i64 | |
%11 = "llvm.mlir.constant"() <{value = 4 : i64}> : () -> i64 | |
%12 = "llvm.mlir.constant"() <{value = 3 : i64}> : () -> i64 | |
%13 = "llvm.mlir.constant"() <{value = 2 : i64}> : () -> i64 | |
%14 = "llvm.mlir.constant"() <{value = 1 : i64}> : () -> i64 | |
%15 = "llvm.mlir.constant"() <{value = 0 : i32}> : () -> i32 | |
%16 = "llvm.mlir.undef"() : () -> vector<16xf32> | |
%17 = "llvm.mlir.constant"() <{value = 0 : i64}> : () -> i64 | |
%18 = "llvm.mlir.constant"() <{value = 256 : index}> : () -> i64 | |
%19 = "llvm.mlir.constant"() <{value = 16777216 : index}> : () -> i64 | |
%20 = "llvm.mlir.constant"() <{value = 64 : index}> : () -> i64 | |
%21 = "llvm.mlir.constant"() <{value = true}> : () -> i1 | |
%22 = "llvm.mlir.constant"() <{value = 65536 : index}> : () -> i64 | |
%23 = "llvm.mlir.constant"() <{value = 15 : index}> : () -> i64 | |
%24 = "llvm.mlir.constant"() <{value = 14 : index}> : () -> i64 | |
%25 = "llvm.mlir.constant"() <{value = 13 : index}> : () -> i64 | |
%26 = "llvm.mlir.constant"() <{value = 12 : index}> : () -> i64 | |
%27 = "llvm.mlir.constant"() <{value = 11 : index}> : () -> i64 | |
%28 = "llvm.mlir.constant"() <{value = 10 : index}> : () -> i64 | |
%29 = "llvm.mlir.constant"() <{value = 9 : index}> : () -> i64 | |
%30 = "llvm.mlir.constant"() <{value = 8 : index}> : () -> i64 | |
%31 = "llvm.mlir.constant"() <{value = 7 : index}> : () -> i64 | |
%32 = "llvm.mlir.constant"() <{value = 6 : index}> : () -> i64 | |
%33 = "llvm.mlir.constant"() <{value = 5 : index}> : () -> i64 | |
%34 = "llvm.mlir.constant"() <{value = 4 : index}> : () -> i64 | |
%35 = "llvm.mlir.constant"() <{value = 3 : index}> : () -> i64 | |
%36 = "llvm.mlir.constant"() <{value = 2 : index}> : () -> i64 | |
%37 = "llvm.mlir.constant"() <{value = 16 : index}> : () -> i64 | |
%38 = "llvm.mlir.constant"() <{value = dense<0.000000e+00> : vector<16x16xf32>}> : () -> !llvm.array<16 x vector<16xf32>> | |
%39 = "llvm.mlir.constant"() <{value = 1 : index}> : () -> i64 | |
%40 = "llvm.mlir.constant"() <{value = 4096 : index}> : () -> i64 | |
%41 = "llvm.mlir.constant"() <{value = 0.000000e+00 : f32}> : () -> f32 | |
%42 = "llvm.mlir.constant"() <{value = 0 : index}> : () -> i64 | |
%43 = "llvm.mlir.constant"() <{value = 131072 : index}> : () -> i64 | |
%44 = "llvm.load"(%arg1) <{ordering = 0 : i64}> : (!llvm.ptr) -> !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)> | |
%45 = "llvm.extractvalue"(%44) <{position = array<i64: 10>}> : (!llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>) -> !llvm.ptr | |
%46 = "llvm.load"(%45) <{ordering = 0 : i64}> : (!llvm.ptr) -> !llvm.ptr | |
"llvm.intr.assume"(%21, %46, %20) <{op_bundle_sizes = array<i32: 2>, op_bundle_tags = ["align"]}> : (i1, !llvm.ptr, i64) -> () | |
%47 = "llvm.load"(%arg1) <{ordering = 0 : i64}> : (!llvm.ptr) -> !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)> | |
%48 = "llvm.extractvalue"(%47) <{position = array<i64: 10>}> : (!llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>) -> !llvm.ptr | |
%49 = "llvm.load"(%48) <{ordering = 0 : i64}> : (!llvm.ptr) -> !llvm.ptr | |
%50 = "llvm.getelementptr"(%49) <{elem_type = i8, rawConstantIndices = array<i32: 131072>}> : (!llvm.ptr) -> !llvm.ptr | |
"llvm.intr.assume"(%21, %50, %20) <{op_bundle_sizes = array<i32: 2>, op_bundle_tags = ["align"]}> : (i1, !llvm.ptr, i64) -> () | |
%51 = "llvm.load"(%arg1) <{ordering = 0 : i64}> : (!llvm.ptr) -> !llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)> | |
%52 = "llvm.extractvalue"(%51) <{position = array<i64: 10>}> : (!llvm.struct<"iree_hal_executable_dispatch_state_v0_t", (i32, i32, i16, i16, i32, i32, i16, i8, i8, ptr, ptr, ptr)>) -> !llvm.ptr | |
%53 = "llvm.getelementptr"(%52) <{elem_type = !llvm.ptr, rawConstantIndices = array<i32: 1>}> : (!llvm.ptr) -> !llvm.ptr | |
%54 = "llvm.load"(%53) <{ordering = 0 : i64}> : (!llvm.ptr) -> !llvm.ptr | |
%55 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
"llvm.intr.assume"(%21, %55, %20) <{op_bundle_sizes = array<i32: 2>, op_bundle_tags = ["align"]}> : (i1, !llvm.ptr, i64) -> () | |
%56 = "llvm.load"(%arg2) <{ordering = 0 : i64}> : (!llvm.ptr) -> !llvm.struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr, i32)> | |
%57 = "llvm.extractvalue"(%56) <{position = array<i64: 0>}> : (!llvm.struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr, i32)>) -> i32 | |
%58 = "llvm.zext"(%57) : (i32) -> i64 | |
%59 = "llvm.load"(%arg2) <{ordering = 0 : i64}> : (!llvm.ptr) -> !llvm.struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr, i32)> | |
%60 = "llvm.extractvalue"(%59) <{position = array<i64: 1>}> : (!llvm.struct<"iree_hal_executable_workgroup_state_v0_t", (i32, i32, i16, i16, i32, ptr, i32)>) -> i32 | |
%61 = "llvm.zext"(%60) : (i32) -> i64 | |
"llvm.br"(%42)[^bb1] : (i64) -> () | |
^bb1(%62: i64): // 2 preds: ^bb0, ^bb4 | |
%63 = "llvm.icmp"(%62, %37) <{predicate = 2 : i64}> : (i64, i64) -> i1 | |
"llvm.cond_br"(%63, %42)[^bb2, ^bb5] <{operandSegmentSizes = array<i32: 1, 1, 0>}> : (i1, i64) -> () | |
^bb2(%64: i64): // 2 preds: ^bb1, ^bb3 | |
%65 = "llvm.icmp"(%64, %37) <{predicate = 2 : i64}> : (i64, i64) -> i1 | |
"llvm.cond_br"(%65)[^bb3, ^bb4] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> () | |
^bb3: // pred: ^bb2 | |
%66 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%67 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%68 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%69 = "llvm.add"(%67, %68) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%70 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%71 = "llvm.add"(%69, %70) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%72 = "llvm.mul"(%62, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%73 = "llvm.add"(%71, %72) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%74 = "llvm.add"(%73, %64) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%75 = "llvm.getelementptr"(%66, %74) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%41, %75) <{ordering = 0 : i64}> : (f32, !llvm.ptr) -> () | |
%76 = "llvm.add"(%64, %39) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
"llvm.br"(%76)[^bb2] : (i64) -> () | |
^bb4: // pred: ^bb2 | |
%77 = "llvm.add"(%62, %39) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
"llvm.br"(%77)[^bb1] : (i64) -> () | |
^bb5: // pred: ^bb1 | |
%78 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%79 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%80 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%81 = "llvm.add"(%79, %80) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%82 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%83 = "llvm.add"(%81, %82) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%84 = "llvm.mul"(%42, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%85 = "llvm.add"(%83, %84) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%86 = "llvm.add"(%85, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%87 = "llvm.getelementptr"(%78, %86) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%88 = "llvm.load"(%87) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%89 = "llvm.insertvalue"(%38, %88) <{position = array<i64: 0>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%90 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%91 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%92 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%93 = "llvm.add"(%91, %92) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%94 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%95 = "llvm.add"(%93, %94) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%96 = "llvm.mul"(%39, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%97 = "llvm.add"(%95, %96) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%98 = "llvm.add"(%97, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%99 = "llvm.getelementptr"(%90, %98) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%100 = "llvm.load"(%99) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%101 = "llvm.insertvalue"(%89, %100) <{position = array<i64: 1>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%102 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%103 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%104 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%105 = "llvm.add"(%103, %104) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%106 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%107 = "llvm.add"(%105, %106) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%108 = "llvm.mul"(%36, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%109 = "llvm.add"(%107, %108) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%110 = "llvm.add"(%109, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%111 = "llvm.getelementptr"(%102, %110) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%112 = "llvm.load"(%111) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%113 = "llvm.insertvalue"(%101, %112) <{position = array<i64: 2>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%114 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%115 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%116 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%117 = "llvm.add"(%115, %116) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%118 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%119 = "llvm.add"(%117, %118) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%120 = "llvm.mul"(%35, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%121 = "llvm.add"(%119, %120) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%122 = "llvm.add"(%121, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%123 = "llvm.getelementptr"(%114, %122) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%124 = "llvm.load"(%123) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%125 = "llvm.insertvalue"(%113, %124) <{position = array<i64: 3>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%126 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%127 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%128 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%129 = "llvm.add"(%127, %128) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%130 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%131 = "llvm.add"(%129, %130) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%132 = "llvm.mul"(%34, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%133 = "llvm.add"(%131, %132) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%134 = "llvm.add"(%133, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%135 = "llvm.getelementptr"(%126, %134) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%136 = "llvm.load"(%135) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%137 = "llvm.insertvalue"(%125, %136) <{position = array<i64: 4>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%138 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%139 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%140 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%141 = "llvm.add"(%139, %140) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%142 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%143 = "llvm.add"(%141, %142) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%144 = "llvm.mul"(%33, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%145 = "llvm.add"(%143, %144) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%146 = "llvm.add"(%145, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%147 = "llvm.getelementptr"(%138, %146) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%148 = "llvm.load"(%147) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%149 = "llvm.insertvalue"(%137, %148) <{position = array<i64: 5>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%150 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%151 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%152 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%153 = "llvm.add"(%151, %152) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%154 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%155 = "llvm.add"(%153, %154) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%156 = "llvm.mul"(%32, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%157 = "llvm.add"(%155, %156) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%158 = "llvm.add"(%157, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%159 = "llvm.getelementptr"(%150, %158) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%160 = "llvm.load"(%159) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%161 = "llvm.insertvalue"(%149, %160) <{position = array<i64: 6>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%162 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%163 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%164 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%165 = "llvm.add"(%163, %164) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%166 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%167 = "llvm.add"(%165, %166) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%168 = "llvm.mul"(%31, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%169 = "llvm.add"(%167, %168) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%170 = "llvm.add"(%169, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%171 = "llvm.getelementptr"(%162, %170) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%172 = "llvm.load"(%171) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%173 = "llvm.insertvalue"(%161, %172) <{position = array<i64: 7>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%174 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%175 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%176 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%177 = "llvm.add"(%175, %176) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%178 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%179 = "llvm.add"(%177, %178) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%180 = "llvm.mul"(%30, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%181 = "llvm.add"(%179, %180) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%182 = "llvm.add"(%181, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%183 = "llvm.getelementptr"(%174, %182) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%184 = "llvm.load"(%183) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%185 = "llvm.insertvalue"(%173, %184) <{position = array<i64: 8>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%186 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%187 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%188 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%189 = "llvm.add"(%187, %188) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%190 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%191 = "llvm.add"(%189, %190) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%192 = "llvm.mul"(%29, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%193 = "llvm.add"(%191, %192) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%194 = "llvm.add"(%193, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%195 = "llvm.getelementptr"(%186, %194) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%196 = "llvm.load"(%195) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%197 = "llvm.insertvalue"(%185, %196) <{position = array<i64: 9>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%198 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%199 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%200 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%201 = "llvm.add"(%199, %200) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%202 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%203 = "llvm.add"(%201, %202) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%204 = "llvm.mul"(%28, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%205 = "llvm.add"(%203, %204) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%206 = "llvm.add"(%205, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%207 = "llvm.getelementptr"(%198, %206) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%208 = "llvm.load"(%207) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%209 = "llvm.insertvalue"(%197, %208) <{position = array<i64: 10>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%210 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%211 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%212 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%213 = "llvm.add"(%211, %212) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%214 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%215 = "llvm.add"(%213, %214) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%216 = "llvm.mul"(%27, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%217 = "llvm.add"(%215, %216) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%218 = "llvm.add"(%217, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%219 = "llvm.getelementptr"(%210, %218) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%220 = "llvm.load"(%219) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%221 = "llvm.insertvalue"(%209, %220) <{position = array<i64: 11>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%222 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%223 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%224 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%225 = "llvm.add"(%223, %224) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%226 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%227 = "llvm.add"(%225, %226) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%228 = "llvm.mul"(%26, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%229 = "llvm.add"(%227, %228) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%230 = "llvm.add"(%229, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%231 = "llvm.getelementptr"(%222, %230) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%232 = "llvm.load"(%231) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%233 = "llvm.insertvalue"(%221, %232) <{position = array<i64: 12>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%234 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%235 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%236 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%237 = "llvm.add"(%235, %236) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%238 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%239 = "llvm.add"(%237, %238) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%240 = "llvm.mul"(%25, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%241 = "llvm.add"(%239, %240) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%242 = "llvm.add"(%241, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%243 = "llvm.getelementptr"(%234, %242) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%244 = "llvm.load"(%243) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%245 = "llvm.insertvalue"(%233, %244) <{position = array<i64: 13>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%246 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%247 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%248 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%249 = "llvm.add"(%247, %248) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%250 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%251 = "llvm.add"(%249, %250) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%252 = "llvm.mul"(%24, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%253 = "llvm.add"(%251, %252) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%254 = "llvm.add"(%253, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%255 = "llvm.getelementptr"(%246, %254) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%256 = "llvm.load"(%255) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%257 = "llvm.insertvalue"(%245, %256) <{position = array<i64: 14>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%258 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%259 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%260 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%261 = "llvm.add"(%259, %260) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%262 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%263 = "llvm.add"(%261, %262) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%264 = "llvm.mul"(%23, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%265 = "llvm.add"(%263, %264) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%266 = "llvm.add"(%265, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%267 = "llvm.getelementptr"(%258, %266) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%268 = "llvm.load"(%267) <{alignment = 4 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xf32> | |
%269 = "llvm.insertvalue"(%257, %268) <{position = array<i64: 15>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
"llvm.br"(%42, %269)[^bb6] : (i64, !llvm.array<16 x vector<16xf32>>) -> () | |
^bb6(%270: i64, %271: !llvm.array<16 x vector<16xf32>>): // 2 preds: ^bb5, ^bb7 | |
%272 = "llvm.icmp"(%270, %40) <{predicate = 2 : i64}> : (i64, i64) -> i1 | |
"llvm.cond_br"(%272)[^bb7, ^bb8] <{operandSegmentSizes = array<i32: 1, 0, 0>}> : (i1) -> () | |
^bb7: // pred: ^bb6 | |
%273 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%274 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%275 = "llvm.add"(%273, %274) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%276 = "llvm.mul"(%270, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%277 = "llvm.add"(%275, %276) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%278 = "llvm.add"(%277, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%279 = "llvm.add"(%278, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%280 = "llvm.getelementptr"(%46, %279) <{elem_type = i8, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%281 = "llvm.load"(%280) <{alignment = 1 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xi8> | |
%282 = "llvm.getelementptr"(%49) <{elem_type = i8, rawConstantIndices = array<i32: 131072>}> : (!llvm.ptr) -> !llvm.ptr | |
%283 = "llvm.mul"(%42, %19) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%284 = "llvm.mul"(%58, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%285 = "llvm.add"(%283, %284) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%286 = "llvm.mul"(%270, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%287 = "llvm.add"(%285, %286) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%288 = "llvm.add"(%287, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%289 = "llvm.add"(%288, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%290 = "llvm.getelementptr"(%282, %289) <{elem_type = i8, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
%291 = "llvm.load"(%290) <{alignment = 1 : i64, ordering = 0 : i64}> : (!llvm.ptr) -> vector<16xi8> | |
%292 = "llvm.fpext"(%281) : (vector<16xi8>) -> vector<16xf32> | |
%293 = "llvm.fpext"(%291) : (vector<16xi8>) -> vector<16xf32> | |
%294 = "llvm.extractelement"(%292, %17) : (vector<16xf32>, i64) -> f32 | |
%295 = "llvm.insertelement"(%16, %294, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%296 = "llvm.shufflevector"(%295, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%297 = "llvm.extractvalue"(%271) <{position = array<i64: 0>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%298 = "llvm.intr.fmuladd"(%296, %293, %297) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%299 = "llvm.insertvalue"(%38, %298) <{position = array<i64: 0>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%300 = "llvm.extractelement"(%292, %14) : (vector<16xf32>, i64) -> f32 | |
%301 = "llvm.insertelement"(%16, %300, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%302 = "llvm.shufflevector"(%301, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%303 = "llvm.extractvalue"(%271) <{position = array<i64: 1>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%304 = "llvm.intr.fmuladd"(%302, %293, %303) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%305 = "llvm.insertvalue"(%299, %304) <{position = array<i64: 1>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%306 = "llvm.extractelement"(%292, %13) : (vector<16xf32>, i64) -> f32 | |
%307 = "llvm.insertelement"(%16, %306, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%308 = "llvm.shufflevector"(%307, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%309 = "llvm.extractvalue"(%271) <{position = array<i64: 2>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%310 = "llvm.intr.fmuladd"(%308, %293, %309) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%311 = "llvm.insertvalue"(%305, %310) <{position = array<i64: 2>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%312 = "llvm.extractelement"(%292, %12) : (vector<16xf32>, i64) -> f32 | |
%313 = "llvm.insertelement"(%16, %312, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%314 = "llvm.shufflevector"(%313, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%315 = "llvm.extractvalue"(%271) <{position = array<i64: 3>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%316 = "llvm.intr.fmuladd"(%314, %293, %315) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%317 = "llvm.insertvalue"(%311, %316) <{position = array<i64: 3>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%318 = "llvm.extractelement"(%292, %11) : (vector<16xf32>, i64) -> f32 | |
%319 = "llvm.insertelement"(%16, %318, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%320 = "llvm.shufflevector"(%319, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%321 = "llvm.extractvalue"(%271) <{position = array<i64: 4>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%322 = "llvm.intr.fmuladd"(%320, %293, %321) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%323 = "llvm.insertvalue"(%317, %322) <{position = array<i64: 4>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%324 = "llvm.extractelement"(%292, %10) : (vector<16xf32>, i64) -> f32 | |
%325 = "llvm.insertelement"(%16, %324, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%326 = "llvm.shufflevector"(%325, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%327 = "llvm.extractvalue"(%271) <{position = array<i64: 5>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%328 = "llvm.intr.fmuladd"(%326, %293, %327) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%329 = "llvm.insertvalue"(%323, %328) <{position = array<i64: 5>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%330 = "llvm.extractelement"(%292, %9) : (vector<16xf32>, i64) -> f32 | |
%331 = "llvm.insertelement"(%16, %330, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%332 = "llvm.shufflevector"(%331, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%333 = "llvm.extractvalue"(%271) <{position = array<i64: 6>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%334 = "llvm.intr.fmuladd"(%332, %293, %333) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%335 = "llvm.insertvalue"(%329, %334) <{position = array<i64: 6>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%336 = "llvm.extractelement"(%292, %8) : (vector<16xf32>, i64) -> f32 | |
%337 = "llvm.insertelement"(%16, %336, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%338 = "llvm.shufflevector"(%337, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%339 = "llvm.extractvalue"(%271) <{position = array<i64: 7>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%340 = "llvm.intr.fmuladd"(%338, %293, %339) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%341 = "llvm.insertvalue"(%335, %340) <{position = array<i64: 7>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%342 = "llvm.extractelement"(%292, %7) : (vector<16xf32>, i64) -> f32 | |
%343 = "llvm.insertelement"(%16, %342, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%344 = "llvm.shufflevector"(%343, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%345 = "llvm.extractvalue"(%271) <{position = array<i64: 8>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%346 = "llvm.intr.fmuladd"(%344, %293, %345) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%347 = "llvm.insertvalue"(%341, %346) <{position = array<i64: 8>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%348 = "llvm.extractelement"(%292, %6) : (vector<16xf32>, i64) -> f32 | |
%349 = "llvm.insertelement"(%16, %348, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%350 = "llvm.shufflevector"(%349, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%351 = "llvm.extractvalue"(%271) <{position = array<i64: 9>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%352 = "llvm.intr.fmuladd"(%350, %293, %351) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%353 = "llvm.insertvalue"(%347, %352) <{position = array<i64: 9>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%354 = "llvm.extractelement"(%292, %5) : (vector<16xf32>, i64) -> f32 | |
%355 = "llvm.insertelement"(%16, %354, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%356 = "llvm.shufflevector"(%355, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%357 = "llvm.extractvalue"(%271) <{position = array<i64: 10>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%358 = "llvm.intr.fmuladd"(%356, %293, %357) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%359 = "llvm.insertvalue"(%353, %358) <{position = array<i64: 10>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%360 = "llvm.extractelement"(%292, %4) : (vector<16xf32>, i64) -> f32 | |
%361 = "llvm.insertelement"(%16, %360, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%362 = "llvm.shufflevector"(%361, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%363 = "llvm.extractvalue"(%271) <{position = array<i64: 11>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%364 = "llvm.intr.fmuladd"(%362, %293, %363) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%365 = "llvm.insertvalue"(%359, %364) <{position = array<i64: 11>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%366 = "llvm.extractelement"(%292, %3) : (vector<16xf32>, i64) -> f32 | |
%367 = "llvm.insertelement"(%16, %366, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%368 = "llvm.shufflevector"(%367, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%369 = "llvm.extractvalue"(%271) <{position = array<i64: 12>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%370 = "llvm.intr.fmuladd"(%368, %293, %369) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%371 = "llvm.insertvalue"(%365, %370) <{position = array<i64: 12>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%372 = "llvm.extractelement"(%292, %2) : (vector<16xf32>, i64) -> f32 | |
%373 = "llvm.insertelement"(%16, %372, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%374 = "llvm.shufflevector"(%373, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%375 = "llvm.extractvalue"(%271) <{position = array<i64: 13>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%376 = "llvm.intr.fmuladd"(%374, %293, %375) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%377 = "llvm.insertvalue"(%371, %376) <{position = array<i64: 13>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%378 = "llvm.extractelement"(%292, %1) : (vector<16xf32>, i64) -> f32 | |
%379 = "llvm.insertelement"(%16, %378, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%380 = "llvm.shufflevector"(%379, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%381 = "llvm.extractvalue"(%271) <{position = array<i64: 14>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%382 = "llvm.intr.fmuladd"(%380, %293, %381) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%383 = "llvm.insertvalue"(%377, %382) <{position = array<i64: 14>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%384 = "llvm.extractelement"(%292, %0) : (vector<16xf32>, i64) -> f32 | |
%385 = "llvm.insertelement"(%16, %384, %15) : (vector<16xf32>, f32, i32) -> vector<16xf32> | |
%386 = "llvm.shufflevector"(%385, %16) <{mask = array<i32: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>}> : (vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%387 = "llvm.extractvalue"(%271) <{position = array<i64: 15>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%388 = "llvm.intr.fmuladd"(%386, %293, %387) <{fastmathFlags = #llvm.fastmath<none>}> : (vector<16xf32>, vector<16xf32>, vector<16xf32>) -> vector<16xf32> | |
%389 = "llvm.insertvalue"(%383, %388) <{position = array<i64: 15>}> : (!llvm.array<16 x vector<16xf32>>, vector<16xf32>) -> !llvm.array<16 x vector<16xf32>> | |
%390 = "llvm.add"(%270, %39) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
"llvm.br"(%390, %389)[^bb6] : (i64, !llvm.array<16 x vector<16xf32>>) -> () | |
^bb8: // pred: ^bb6 | |
%391 = "llvm.extractvalue"(%271) <{position = array<i64: 0>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%392 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%393 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%394 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%395 = "llvm.add"(%393, %394) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%396 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%397 = "llvm.add"(%395, %396) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%398 = "llvm.mul"(%42, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%399 = "llvm.add"(%397, %398) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%400 = "llvm.add"(%399, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%401 = "llvm.getelementptr"(%392, %400) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%391, %401) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%402 = "llvm.extractvalue"(%271) <{position = array<i64: 1>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%403 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%404 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%405 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%406 = "llvm.add"(%404, %405) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%407 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%408 = "llvm.add"(%406, %407) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%409 = "llvm.mul"(%39, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%410 = "llvm.add"(%408, %409) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%411 = "llvm.add"(%410, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%412 = "llvm.getelementptr"(%403, %411) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%402, %412) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%413 = "llvm.extractvalue"(%271) <{position = array<i64: 2>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%414 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%415 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%416 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%417 = "llvm.add"(%415, %416) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%418 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%419 = "llvm.add"(%417, %418) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%420 = "llvm.mul"(%36, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%421 = "llvm.add"(%419, %420) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%422 = "llvm.add"(%421, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%423 = "llvm.getelementptr"(%414, %422) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%413, %423) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%424 = "llvm.extractvalue"(%271) <{position = array<i64: 3>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%425 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%426 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%427 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%428 = "llvm.add"(%426, %427) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%429 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%430 = "llvm.add"(%428, %429) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%431 = "llvm.mul"(%35, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%432 = "llvm.add"(%430, %431) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%433 = "llvm.add"(%432, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%434 = "llvm.getelementptr"(%425, %433) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%424, %434) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%435 = "llvm.extractvalue"(%271) <{position = array<i64: 4>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%436 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%437 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%438 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%439 = "llvm.add"(%437, %438) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%440 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%441 = "llvm.add"(%439, %440) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%442 = "llvm.mul"(%34, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%443 = "llvm.add"(%441, %442) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%444 = "llvm.add"(%443, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%445 = "llvm.getelementptr"(%436, %444) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%435, %445) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%446 = "llvm.extractvalue"(%271) <{position = array<i64: 5>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%447 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%448 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%449 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%450 = "llvm.add"(%448, %449) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%451 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%452 = "llvm.add"(%450, %451) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%453 = "llvm.mul"(%33, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%454 = "llvm.add"(%452, %453) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%455 = "llvm.add"(%454, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%456 = "llvm.getelementptr"(%447, %455) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%446, %456) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%457 = "llvm.extractvalue"(%271) <{position = array<i64: 6>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%458 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%459 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%460 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%461 = "llvm.add"(%459, %460) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%462 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%463 = "llvm.add"(%461, %462) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%464 = "llvm.mul"(%32, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%465 = "llvm.add"(%463, %464) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%466 = "llvm.add"(%465, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%467 = "llvm.getelementptr"(%458, %466) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%457, %467) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%468 = "llvm.extractvalue"(%271) <{position = array<i64: 7>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%469 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%470 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%471 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%472 = "llvm.add"(%470, %471) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%473 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%474 = "llvm.add"(%472, %473) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%475 = "llvm.mul"(%31, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%476 = "llvm.add"(%474, %475) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%477 = "llvm.add"(%476, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%478 = "llvm.getelementptr"(%469, %477) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%468, %478) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%479 = "llvm.extractvalue"(%271) <{position = array<i64: 8>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%480 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%481 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%482 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%483 = "llvm.add"(%481, %482) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%484 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%485 = "llvm.add"(%483, %484) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%486 = "llvm.mul"(%30, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%487 = "llvm.add"(%485, %486) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%488 = "llvm.add"(%487, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%489 = "llvm.getelementptr"(%480, %488) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%479, %489) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%490 = "llvm.extractvalue"(%271) <{position = array<i64: 9>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%491 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%492 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%493 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%494 = "llvm.add"(%492, %493) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%495 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%496 = "llvm.add"(%494, %495) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%497 = "llvm.mul"(%29, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%498 = "llvm.add"(%496, %497) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%499 = "llvm.add"(%498, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%500 = "llvm.getelementptr"(%491, %499) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%490, %500) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%501 = "llvm.extractvalue"(%271) <{position = array<i64: 10>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%502 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%503 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%504 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%505 = "llvm.add"(%503, %504) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%506 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%507 = "llvm.add"(%505, %506) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%508 = "llvm.mul"(%28, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%509 = "llvm.add"(%507, %508) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%510 = "llvm.add"(%509, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%511 = "llvm.getelementptr"(%502, %510) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%501, %511) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%512 = "llvm.extractvalue"(%271) <{position = array<i64: 11>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%513 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%514 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%515 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%516 = "llvm.add"(%514, %515) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%517 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%518 = "llvm.add"(%516, %517) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%519 = "llvm.mul"(%27, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%520 = "llvm.add"(%518, %519) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%521 = "llvm.add"(%520, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%522 = "llvm.getelementptr"(%513, %521) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%512, %522) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%523 = "llvm.extractvalue"(%271) <{position = array<i64: 12>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%524 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%525 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%526 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%527 = "llvm.add"(%525, %526) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%528 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%529 = "llvm.add"(%527, %528) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%530 = "llvm.mul"(%26, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%531 = "llvm.add"(%529, %530) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%532 = "llvm.add"(%531, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%533 = "llvm.getelementptr"(%524, %532) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%523, %533) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%534 = "llvm.extractvalue"(%271) <{position = array<i64: 13>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%535 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%536 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%537 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%538 = "llvm.add"(%536, %537) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%539 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%540 = "llvm.add"(%538, %539) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%541 = "llvm.mul"(%25, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%542 = "llvm.add"(%540, %541) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%543 = "llvm.add"(%542, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%544 = "llvm.getelementptr"(%535, %543) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%534, %544) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%545 = "llvm.extractvalue"(%271) <{position = array<i64: 14>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%546 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%547 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%548 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%549 = "llvm.add"(%547, %548) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%550 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%551 = "llvm.add"(%549, %550) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%552 = "llvm.mul"(%24, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%553 = "llvm.add"(%551, %552) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%554 = "llvm.add"(%553, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%555 = "llvm.getelementptr"(%546, %554) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%545, %555) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
%556 = "llvm.extractvalue"(%271) <{position = array<i64: 15>}> : (!llvm.array<16 x vector<16xf32>>) -> vector<16xf32> | |
%557 = "llvm.getelementptr"(%54) <{elem_type = f32, rawConstantIndices = array<i32: 4227072>}> : (!llvm.ptr) -> !llvm.ptr | |
%558 = "llvm.mul"(%42, %43) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%559 = "llvm.mul"(%61, %22) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%560 = "llvm.add"(%558, %559) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%561 = "llvm.mul"(%58, %18) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%562 = "llvm.add"(%560, %561) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%563 = "llvm.mul"(%23, %37) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%564 = "llvm.add"(%562, %563) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%565 = "llvm.add"(%564, %42) <{overflowFlags = #llvm.overflow<none>}> : (i64, i64) -> i64 | |
%566 = "llvm.getelementptr"(%557, %565) <{elem_type = f32, rawConstantIndices = array<i32: -2147483648>}> : (!llvm.ptr, i64) -> !llvm.ptr | |
"llvm.store"(%556, %566) <{alignment = 4 : i64, ordering = 0 : i64}> : (vector<16xf32>, !llvm.ptr) -> () | |
"llvm.return"(%15) : (i32) -> () | |
}) : () -> () | |
}) {llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-unknown-eabi-elf"} : () -> () | |
"hal.executable.variant_end"() : () -> () | |
}) {sym_name = "embedded_elf_x86_64", target = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,-amx-fp8,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,+avx512ifma,+xsave,+sse4.2,-tsxldtrk,-sm3,-ptwrite,-widekl,-movrs,+invpcid,+64bit,+xsavec,-avx10.1-512,+avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-ccmp,-amx-int8,-kl,-avx10.1-256,+evex512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,-sha512,+vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,-avx10.2-256,+gfni,-avxvnniint16,-amx-fp16,-zu,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,+avx512bf16,+avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,+pku,-nf,-amx-tf32,-amx-avx512,+fsgsbase,+clzero,+mwaitx,-lwp,+lzcnt,+sha,-movdir64b,-ppx,+wbnoinvd,-enqcmd,-amx-transpose,-avx10.2-512,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,+avx512bitalg,+rdpru,+clwb,+mmx,+sse2,+rdseed,+avx512vbmi2,-prefetchi,-amx-movrs,+rdpid,-fma4,+avx512vbmi,+shstk,+vaes,-waitpkg,-sgx,+fxsr,+avx512dq,+sse4a", data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", native_vector_size = 64 : i64, target_triple = "x86_64-unknown-unknown-eabi-elf"}>} : () -> () | |
%2 = linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : tensor<1x32x4096xf8E4M3FNUZ>, tensor<1x4096x4096xf8E4M3FNUZ>) outs(%1 : tensor<1x32x4096xf32>) -> tensor<1x32x4096xf32> | |
^ | |
Traceback (most recent call last): | |
File "<frozen runpy>", line 198, in _run_module_as_main | |
File "<frozen runpy>", line 88, in _run_code | |
File "/home/chi/src/shark-ai/sharktank/sharktank/examples/paged_llm_v1.py", line 342, in <module> | |
main() | |
File "/home/chi/src/shark-ai/sharktank/sharktank/examples/paged_llm_v1.py", line 321, in main | |
batch.prefill() | |
File "/home/chi/src/shark-ai/sharktank/sharktank/examples/paged_llm_v1.py", line 173, in prefill | |
logits = model.prefill( | |
^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/sharktank/sharktank/models/llama/llama.py", line 150, in prefill | |
h = block( | |
^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl | |
return self._call_impl(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/sharktank/sharktank/models/llama/llama.py", line 286, in forward | |
h = self.attn( | |
^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl | |
return self._call_impl(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/sharktank/sharktank/layers/paged_llama_attention_block.py", line 107, in forward | |
xq = self.attn_q(x) | |
^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl | |
return self._call_impl(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl | |
return forward_call(*args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/sharktank/sharktank/layers/linear.py", line 80, in forward | |
y = ops.linear(x, weight, bias) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/sharktank/sharktank/ops/_registry.py", line 199, in __call__ | |
selected_override, *results = trampoline(self, *args, **kwargs) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/sharktank/sharktank/ops/signatures.py", line 663, in _linear_trampoline | |
result = override(input, weight, bias, accum_dtype=accum_dtype) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/sharktank/sharktank/ops/qlinear_impls.py", line 95, in qlinear_tensor_scaled | |
y_qs = _invoke_mmt_kernel(x_qs, weight_qs, accum_dtype=accum_dtype) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/sharktank/sharktank/ops/qlinear_impls.py", line 206, in _invoke_mmt_kernel | |
y_qs = kernels.batch_matmul_transpose_b(lhs, rhs, accum_dtype=accum_dtype) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/sharktank/sharktank/kernels/batch_matmul_transpose_b.py", line 33, in batch_matmul_transpose_b | |
return _batch_matmul_transpose_b( | |
^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/torch/_ops.py", line 1116, in __call__ | |
return self._op(*args, **(kwargs or {})) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/iree/turbine/runtime/op_reg/base.py", line 928, in handler | |
return eager_dispatch(ksel) | |
^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/iree/turbine/runtime/op_reg/eager.py", line 98, in eager_dispatch | |
vm_context, vm_f, config = compile_standalone_kernel(device, ksel) | |
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
File "/home/chi/src/shark-ai/.venv/lib/python3.11/site-packages/iree/turbine/runtime/op_reg/compiler.py", line 126, in compile_standalone_kernel | |
raise GeneralError(f"Kernel compilation failed. See diagnostics.") | |
iree.turbine.support.exceptions.GeneralError: Kernel compilation failed. See diagnostics. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
With boian new commit for eager mode, https://github.com/nod-ai/shark-ai/pull/896/files/9f1c3d40f2aa7a40abd4ab6f562648f7b35e53dc..aa5c7b099e0396607127d80622e1fab243a3b694