Skip to content

Instantly share code, notes, and snippets.

@AmosLewis
Created February 19, 2025 20:40
Show Gist options
  • Save AmosLewis/101f66d2dd0cfb9b54ec1e770d8bbadb to your computer and use it in GitHub Desktop.
Save AmosLewis/101f66d2dd0cfb9b54ec1e770d8bbadb to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
#map = affine_map<()[s0] -> (4, s0 * 32)>
#map1 = affine_map<()[s0] -> (4, s0)>
#map2 = affine_map<()[s0] -> (s0, 2097152)>
#map3 = affine_map<()[s0] -> (s0 * 32)>
#map4 = affine_map<()[s0] -> (1, 1, s0 * 32, 131072)>
#map5 = affine_map<()[s0] -> (1, 1, s0 * 32, s0 * 32)>
#map6 = affine_map<()[s0] -> (4, 1, s0 * 32)>
#map7 = affine_map<()[s0] -> (4, 1, 1, s0 * 32)>
#map8 = affine_map<()[s0] -> (4, 1, s0 * 32, s0 * 32)>
#map9 = affine_map<()[s0] -> (4, s0 * 32, 4096)>
#map10 = affine_map<()[s0] -> (4, s0 * 32, 1)>
#map11 = affine_map<()[s0] -> (4, s0 * 32, 1024)>
#map12 = affine_map<()[s0] -> (4, s0 * 32, 32, 128)>
#map13 = affine_map<()[s0] -> (4, s0 * 32, 8, 128)>
#map14 = affine_map<()[s0] -> (s0 * 32, 128)>
#map15 = affine_map<()[s0] -> (1, s0 * 32, 128)>
#map16 = affine_map<()[s0] -> (1, s0 * 32, 1, 128)>
#map17 = affine_map<()[s0] -> (4, s0 * 32, 1, 128)>
#map18 = affine_map<()[s0] -> (4, s0 * 32, 32, 64)>
#map19 = affine_map<()[s0] -> (4, s0 * 32, 8, 64)>
#map20 = affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)>
#map21 = affine_map<()[s0] -> (s0 * 64, 32, 8, 128)>
#map22 = affine_map<()[s0] -> (4, s0, 32, 8, 128)>
#map23 = affine_map<()[s0] -> (s0 * 4, 32, 8, 128)>
#map24 = affine_map<()[s0] -> (s0 * 4)>
#map25 = affine_map<()[s0] -> (4, s0 * 32, 8, 1, 128)>
#map26 = affine_map<()[s0] -> (4, s0 * 32, 8, 4, 128)>
#map27 = affine_map<()[s0] -> (4, 32, s0 * 32, 128)>
#map28 = affine_map<()[s0] -> (4, s0 * 32, 14336)>
#map29 = affine_map<()[s0] -> (s0 * 128, 4096)>
#map30 = affine_map<()[s0] -> (s0 * 128, 128256)>
#map31 = affine_map<()[s0] -> (4, s0 * 32, 128256)>
#map32 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map33 = affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>
#map34 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)>
#map35 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d3)>
#map36 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d4, d3)>
#map37 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d4, d2)>
#map38 = affine_map<(d0, d1, d2, d3, d4) -> ()>
#map39 = affine_map<(d0, d1, d2, d3, d4) -> (d1, d4)>
#map40 = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2)>
"builtin.module"() <{sym_name = "module"}> ({
"util.global"() <{initial_value = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xbf16>, sym_name = "__auto.token_embd.weight", sym_visibility = "private", type = tensor<128256x4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.0.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.0.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.0.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.0.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.0.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.0.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.0.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.0.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.0.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.0.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.0.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.0.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.1.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.1.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.1.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.1.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.1.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.1.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.1.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.1.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.1.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.1.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.1.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.1.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.2.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.2.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.2.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.2.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.2.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.2.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.2.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.2.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.2.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.2.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.2.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.2.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.3.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.3.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.3.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.3.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.3.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.3.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.3.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.3.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.3.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.3.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.3.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.3.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.4.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.4.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.4.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.4.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.4.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.4.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.4.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.4.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.4.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.4.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.4.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.4.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.5.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.5.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.5.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.5.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.5.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.5.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.5.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.5.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.5.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.5.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.5.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.5.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.6.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.6.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.6.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.6.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.6.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.6.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.6.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.6.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.6.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.6.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.6.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.6.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.7.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.7.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.7.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.7.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.7.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.7.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.7.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.7.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.7.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.7.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.7.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.7.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.8.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.8.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.8.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.8.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.8.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.8.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.8.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.8.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.8.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.8.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.8.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.8.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.9.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.9.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.9.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.9.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.9.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.9.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.9.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.9.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.9.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.9.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.9.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.9.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.10.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.10.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.10.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.10.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.10.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.10.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.10.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.10.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.10.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.10.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.10.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.10.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.11.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.11.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.11.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.11.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.11.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.11.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.11.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.11.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.11.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.11.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.11.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.11.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.12.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.12.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.12.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.12.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.12.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.12.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.12.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.12.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.12.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.12.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.12.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.12.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.13.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.13.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.13.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.13.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.13.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.13.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.13.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.13.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.13.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.13.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.13.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.13.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.14.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.14.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.14.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.14.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.14.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.14.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.14.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.14.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.14.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.14.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.14.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.14.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.15.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.15.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.15.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.15.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.15.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.15.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.15.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.15.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.15.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.15.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.15.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.15.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.16.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.16.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.16.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.16.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.16.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.16.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.16.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.16.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.16.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.16.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.16.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.16.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.17.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.17.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.17.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.17.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.17.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.17.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.17.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.17.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.17.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.17.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.17.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.17.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.18.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.18.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.18.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.18.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.18.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.18.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.18.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.18.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.18.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.18.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.18.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.18.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.19.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.19.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.19.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.19.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.19.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.19.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.19.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.19.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.19.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.19.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.19.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.19.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.20.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.20.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.20.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.20.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.20.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.20.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.20.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.20.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.20.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.20.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.20.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.20.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.21.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.21.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.21.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.21.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.21.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.21.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.21.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.21.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.21.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.21.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.21.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.21.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.22.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.22.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.22.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.22.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.22.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.22.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.22.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.22.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.22.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.22.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.22.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.22.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.23.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.23.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.23.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.23.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.23.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.23.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.23.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.23.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.23.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.23.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.23.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.23.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.24.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.24.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.24.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.24.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.24.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.24.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.24.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.24.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.24.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.24.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.24.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.24.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.25.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.25.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.25.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.25.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.25.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.25.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.25.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.25.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.25.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.25.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.25.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.25.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.26.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.26.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.26.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.26.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.26.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.26.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.26.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.26.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.26.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.26.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.26.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.26.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.27.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.27.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.27.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.27.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.27.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.27.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.27.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.27.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.27.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.27.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.27.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.27.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.28.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.28.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.28.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.28.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.28.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.28.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.28.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.28.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.28.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.28.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.28.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.28.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.29.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.29.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.29.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.29.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.29.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.29.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.29.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.29.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.29.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.29.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.29.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.29.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.30.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.30.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.30.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.30.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.30.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.30.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.30.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.30.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.30.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.30.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.30.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.30.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.31.attn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_q.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.attn_q.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.31.attn_q.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_q.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.attn_q.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_k.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.attn_k.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.31.attn_k.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_k.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.attn_k.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_v.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.attn_v.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.31.attn_v.weight:qs", sym_visibility = "private", type = tensor<1024x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_v.q_output:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.attn_v.q_output:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_scale"> : tensor<f32>, sym_name = "__auto.blk.31.attn_scale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_output.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.attn_output.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.31.attn_output.weight:qs", sym_visibility = "private", type = tensor<4096x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.blk.31.ffn_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.ffn_gate.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.ffn_gate.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.31.ffn_gate.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.ffn_up.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.ffn_up.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ>, sym_name = "__auto.blk.31.ffn_up.weight:qs", sym_visibility = "private", type = tensor<14336x4096xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.ffn_down.q_input:rscale"> : tensor<f32>, sym_name = "__auto.blk.31.ffn_down.q_input:rscale", sym_visibility = "private", type = tensor<f32>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"blk.31.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ>, sym_name = "__auto.blk.31.ffn_down.weight:qs", sym_visibility = "private", type = tensor<4096x14336xf8E4M3FNUZ>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xbf16>, sym_name = "__auto.output_norm.weight", sym_visibility = "private", type = tensor<4096xbf16>}> : () -> ()
"util.global"() <{initial_value = #stream.parameter.named<"model"::"output.weight"> : tensor<128256x4096xbf16>, sym_name = "__auto.output.weight", sym_visibility = "private", type = tensor<128256x4096xbf16>}> : () -> ()
"func.func"() <{arg_attrs = [{}, {}, {}, {}], function_type = (!torch.vtensor<[4,?],si64>, !torch.vtensor<[4],si64>, !torch.vtensor<[4,?],si64>, !torch.tensor<[?,2097152],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,128256],bf16>, sym_name = "prefill_bs4"}> ({
^bb0(%arg67: !torch.vtensor<[4,?],si64>, %arg68: !torch.vtensor<[4],si64>, %arg69: !torch.vtensor<[4,?],si64>, %arg70: !torch.tensor<[?,2097152],f8E4M3FNUZ>):
%17186 = "util.global.load"() <{global = @__auto.token_embd.weight}> : () -> tensor<128256x4096xbf16>
%17187 = "torch_c.from_builtin_tensor"(%17186) : (tensor<128256x4096xbf16>) -> !torch.vtensor<[128256,4096],bf16>
%17188 = "util.global.load"() <{global = @__auto.blk.0.attn_norm.weight}> : () -> tensor<4096xbf16>
%17189 = "torch_c.from_builtin_tensor"(%17188) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17190 = "util.global.load"() <{global = @"__auto.blk.0.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17191 = "torch_c.from_builtin_tensor"(%17190) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17192 = "util.global.load"() <{global = @"__auto.blk.0.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17193 = "torch_c.from_builtin_tensor"(%17192) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17194 = "util.global.load"() <{global = @"__auto.blk.0.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17195 = "torch_c.from_builtin_tensor"(%17194) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17196 = "util.global.load"() <{global = @"__auto.blk.0.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17197 = "torch_c.from_builtin_tensor"(%17196) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17198 = "util.global.load"() <{global = @"__auto.blk.0.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17199 = "torch_c.from_builtin_tensor"(%17198) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17200 = "util.global.load"() <{global = @"__auto.blk.0.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17201 = "torch_c.from_builtin_tensor"(%17200) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17202 = "util.global.load"() <{global = @"__auto.blk.0.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17203 = "torch_c.from_builtin_tensor"(%17202) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17204 = "util.global.load"() <{global = @"__auto.blk.0.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17205 = "torch_c.from_builtin_tensor"(%17204) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17206 = "util.global.load"() <{global = @"__auto.blk.0.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17207 = "torch_c.from_builtin_tensor"(%17206) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17208 = "util.global.load"() <{global = @__auto.blk.0.attn_scale}> : () -> tensor<f32>
%17209 = "torch_c.from_builtin_tensor"(%17208) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17210 = "util.global.load"() <{global = @"__auto.blk.0.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17211 = "torch_c.from_builtin_tensor"(%17210) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17212 = "util.global.load"() <{global = @"__auto.blk.0.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17213 = "torch_c.from_builtin_tensor"(%17212) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17214 = "util.global.load"() <{global = @__auto.blk.0.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17215 = "torch_c.from_builtin_tensor"(%17214) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17216 = "util.global.load"() <{global = @"__auto.blk.0.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17217 = "torch_c.from_builtin_tensor"(%17216) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17218 = "util.global.load"() <{global = @"__auto.blk.0.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17219 = "torch_c.from_builtin_tensor"(%17218) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17220 = "util.global.load"() <{global = @"__auto.blk.0.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17221 = "torch_c.from_builtin_tensor"(%17220) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17222 = "util.global.load"() <{global = @"__auto.blk.0.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17223 = "torch_c.from_builtin_tensor"(%17222) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17224 = "util.global.load"() <{global = @"__auto.blk.0.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17225 = "torch_c.from_builtin_tensor"(%17224) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17226 = "util.global.load"() <{global = @"__auto.blk.0.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17227 = "torch_c.from_builtin_tensor"(%17226) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17228 = "util.global.load"() <{global = @__auto.blk.1.attn_norm.weight}> : () -> tensor<4096xbf16>
%17229 = "torch_c.from_builtin_tensor"(%17228) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17230 = "util.global.load"() <{global = @"__auto.blk.1.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17231 = "torch_c.from_builtin_tensor"(%17230) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17232 = "util.global.load"() <{global = @"__auto.blk.1.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17233 = "torch_c.from_builtin_tensor"(%17232) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17234 = "util.global.load"() <{global = @"__auto.blk.1.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17235 = "torch_c.from_builtin_tensor"(%17234) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17236 = "util.global.load"() <{global = @"__auto.blk.1.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17237 = "torch_c.from_builtin_tensor"(%17236) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17238 = "util.global.load"() <{global = @"__auto.blk.1.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17239 = "torch_c.from_builtin_tensor"(%17238) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17240 = "util.global.load"() <{global = @"__auto.blk.1.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17241 = "torch_c.from_builtin_tensor"(%17240) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17242 = "util.global.load"() <{global = @"__auto.blk.1.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17243 = "torch_c.from_builtin_tensor"(%17242) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17244 = "util.global.load"() <{global = @"__auto.blk.1.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17245 = "torch_c.from_builtin_tensor"(%17244) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17246 = "util.global.load"() <{global = @"__auto.blk.1.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17247 = "torch_c.from_builtin_tensor"(%17246) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17248 = "util.global.load"() <{global = @__auto.blk.1.attn_scale}> : () -> tensor<f32>
%17249 = "torch_c.from_builtin_tensor"(%17248) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17250 = "util.global.load"() <{global = @"__auto.blk.1.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17251 = "torch_c.from_builtin_tensor"(%17250) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17252 = "util.global.load"() <{global = @"__auto.blk.1.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17253 = "torch_c.from_builtin_tensor"(%17252) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17254 = "util.global.load"() <{global = @__auto.blk.1.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17255 = "torch_c.from_builtin_tensor"(%17254) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17256 = "util.global.load"() <{global = @"__auto.blk.1.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17257 = "torch_c.from_builtin_tensor"(%17256) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17258 = "util.global.load"() <{global = @"__auto.blk.1.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17259 = "torch_c.from_builtin_tensor"(%17258) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17260 = "util.global.load"() <{global = @"__auto.blk.1.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17261 = "torch_c.from_builtin_tensor"(%17260) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17262 = "util.global.load"() <{global = @"__auto.blk.1.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17263 = "torch_c.from_builtin_tensor"(%17262) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17264 = "util.global.load"() <{global = @"__auto.blk.1.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17265 = "torch_c.from_builtin_tensor"(%17264) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17266 = "util.global.load"() <{global = @"__auto.blk.1.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17267 = "torch_c.from_builtin_tensor"(%17266) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17268 = "util.global.load"() <{global = @__auto.blk.2.attn_norm.weight}> : () -> tensor<4096xbf16>
%17269 = "torch_c.from_builtin_tensor"(%17268) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17270 = "util.global.load"() <{global = @"__auto.blk.2.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17271 = "torch_c.from_builtin_tensor"(%17270) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17272 = "util.global.load"() <{global = @"__auto.blk.2.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17273 = "torch_c.from_builtin_tensor"(%17272) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17274 = "util.global.load"() <{global = @"__auto.blk.2.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17275 = "torch_c.from_builtin_tensor"(%17274) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17276 = "util.global.load"() <{global = @"__auto.blk.2.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17277 = "torch_c.from_builtin_tensor"(%17276) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17278 = "util.global.load"() <{global = @"__auto.blk.2.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17279 = "torch_c.from_builtin_tensor"(%17278) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17280 = "util.global.load"() <{global = @"__auto.blk.2.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17281 = "torch_c.from_builtin_tensor"(%17280) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17282 = "util.global.load"() <{global = @"__auto.blk.2.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17283 = "torch_c.from_builtin_tensor"(%17282) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17284 = "util.global.load"() <{global = @"__auto.blk.2.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17285 = "torch_c.from_builtin_tensor"(%17284) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17286 = "util.global.load"() <{global = @"__auto.blk.2.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17287 = "torch_c.from_builtin_tensor"(%17286) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17288 = "util.global.load"() <{global = @__auto.blk.2.attn_scale}> : () -> tensor<f32>
%17289 = "torch_c.from_builtin_tensor"(%17288) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17290 = "util.global.load"() <{global = @"__auto.blk.2.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17291 = "torch_c.from_builtin_tensor"(%17290) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17292 = "util.global.load"() <{global = @"__auto.blk.2.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17293 = "torch_c.from_builtin_tensor"(%17292) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17294 = "util.global.load"() <{global = @__auto.blk.2.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17295 = "torch_c.from_builtin_tensor"(%17294) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17296 = "util.global.load"() <{global = @"__auto.blk.2.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17297 = "torch_c.from_builtin_tensor"(%17296) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17298 = "util.global.load"() <{global = @"__auto.blk.2.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17299 = "torch_c.from_builtin_tensor"(%17298) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17300 = "util.global.load"() <{global = @"__auto.blk.2.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17301 = "torch_c.from_builtin_tensor"(%17300) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17302 = "util.global.load"() <{global = @"__auto.blk.2.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17303 = "torch_c.from_builtin_tensor"(%17302) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17304 = "util.global.load"() <{global = @"__auto.blk.2.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17305 = "torch_c.from_builtin_tensor"(%17304) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17306 = "util.global.load"() <{global = @"__auto.blk.2.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17307 = "torch_c.from_builtin_tensor"(%17306) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17308 = "util.global.load"() <{global = @__auto.blk.3.attn_norm.weight}> : () -> tensor<4096xbf16>
%17309 = "torch_c.from_builtin_tensor"(%17308) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17310 = "util.global.load"() <{global = @"__auto.blk.3.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17311 = "torch_c.from_builtin_tensor"(%17310) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17312 = "util.global.load"() <{global = @"__auto.blk.3.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17313 = "torch_c.from_builtin_tensor"(%17312) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17314 = "util.global.load"() <{global = @"__auto.blk.3.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17315 = "torch_c.from_builtin_tensor"(%17314) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17316 = "util.global.load"() <{global = @"__auto.blk.3.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17317 = "torch_c.from_builtin_tensor"(%17316) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17318 = "util.global.load"() <{global = @"__auto.blk.3.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17319 = "torch_c.from_builtin_tensor"(%17318) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17320 = "util.global.load"() <{global = @"__auto.blk.3.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17321 = "torch_c.from_builtin_tensor"(%17320) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17322 = "util.global.load"() <{global = @"__auto.blk.3.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17323 = "torch_c.from_builtin_tensor"(%17322) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17324 = "util.global.load"() <{global = @"__auto.blk.3.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17325 = "torch_c.from_builtin_tensor"(%17324) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17326 = "util.global.load"() <{global = @"__auto.blk.3.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17327 = "torch_c.from_builtin_tensor"(%17326) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17328 = "util.global.load"() <{global = @__auto.blk.3.attn_scale}> : () -> tensor<f32>
%17329 = "torch_c.from_builtin_tensor"(%17328) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17330 = "util.global.load"() <{global = @"__auto.blk.3.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17331 = "torch_c.from_builtin_tensor"(%17330) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17332 = "util.global.load"() <{global = @"__auto.blk.3.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17333 = "torch_c.from_builtin_tensor"(%17332) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17334 = "util.global.load"() <{global = @__auto.blk.3.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17335 = "torch_c.from_builtin_tensor"(%17334) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17336 = "util.global.load"() <{global = @"__auto.blk.3.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17337 = "torch_c.from_builtin_tensor"(%17336) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17338 = "util.global.load"() <{global = @"__auto.blk.3.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17339 = "torch_c.from_builtin_tensor"(%17338) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17340 = "util.global.load"() <{global = @"__auto.blk.3.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17341 = "torch_c.from_builtin_tensor"(%17340) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17342 = "util.global.load"() <{global = @"__auto.blk.3.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17343 = "torch_c.from_builtin_tensor"(%17342) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17344 = "util.global.load"() <{global = @"__auto.blk.3.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17345 = "torch_c.from_builtin_tensor"(%17344) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17346 = "util.global.load"() <{global = @"__auto.blk.3.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17347 = "torch_c.from_builtin_tensor"(%17346) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17348 = "util.global.load"() <{global = @__auto.blk.4.attn_norm.weight}> : () -> tensor<4096xbf16>
%17349 = "torch_c.from_builtin_tensor"(%17348) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17350 = "util.global.load"() <{global = @"__auto.blk.4.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17351 = "torch_c.from_builtin_tensor"(%17350) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17352 = "util.global.load"() <{global = @"__auto.blk.4.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17353 = "torch_c.from_builtin_tensor"(%17352) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17354 = "util.global.load"() <{global = @"__auto.blk.4.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17355 = "torch_c.from_builtin_tensor"(%17354) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17356 = "util.global.load"() <{global = @"__auto.blk.4.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17357 = "torch_c.from_builtin_tensor"(%17356) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17358 = "util.global.load"() <{global = @"__auto.blk.4.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17359 = "torch_c.from_builtin_tensor"(%17358) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17360 = "util.global.load"() <{global = @"__auto.blk.4.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17361 = "torch_c.from_builtin_tensor"(%17360) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17362 = "util.global.load"() <{global = @"__auto.blk.4.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17363 = "torch_c.from_builtin_tensor"(%17362) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17364 = "util.global.load"() <{global = @"__auto.blk.4.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17365 = "torch_c.from_builtin_tensor"(%17364) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17366 = "util.global.load"() <{global = @"__auto.blk.4.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17367 = "torch_c.from_builtin_tensor"(%17366) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17368 = "util.global.load"() <{global = @__auto.blk.4.attn_scale}> : () -> tensor<f32>
%17369 = "torch_c.from_builtin_tensor"(%17368) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17370 = "util.global.load"() <{global = @"__auto.blk.4.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17371 = "torch_c.from_builtin_tensor"(%17370) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17372 = "util.global.load"() <{global = @"__auto.blk.4.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17373 = "torch_c.from_builtin_tensor"(%17372) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17374 = "util.global.load"() <{global = @__auto.blk.4.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17375 = "torch_c.from_builtin_tensor"(%17374) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17376 = "util.global.load"() <{global = @"__auto.blk.4.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17377 = "torch_c.from_builtin_tensor"(%17376) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17378 = "util.global.load"() <{global = @"__auto.blk.4.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17379 = "torch_c.from_builtin_tensor"(%17378) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17380 = "util.global.load"() <{global = @"__auto.blk.4.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17381 = "torch_c.from_builtin_tensor"(%17380) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17382 = "util.global.load"() <{global = @"__auto.blk.4.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17383 = "torch_c.from_builtin_tensor"(%17382) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17384 = "util.global.load"() <{global = @"__auto.blk.4.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17385 = "torch_c.from_builtin_tensor"(%17384) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17386 = "util.global.load"() <{global = @"__auto.blk.4.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17387 = "torch_c.from_builtin_tensor"(%17386) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17388 = "util.global.load"() <{global = @__auto.blk.5.attn_norm.weight}> : () -> tensor<4096xbf16>
%17389 = "torch_c.from_builtin_tensor"(%17388) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17390 = "util.global.load"() <{global = @"__auto.blk.5.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17391 = "torch_c.from_builtin_tensor"(%17390) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17392 = "util.global.load"() <{global = @"__auto.blk.5.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17393 = "torch_c.from_builtin_tensor"(%17392) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17394 = "util.global.load"() <{global = @"__auto.blk.5.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17395 = "torch_c.from_builtin_tensor"(%17394) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17396 = "util.global.load"() <{global = @"__auto.blk.5.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17397 = "torch_c.from_builtin_tensor"(%17396) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17398 = "util.global.load"() <{global = @"__auto.blk.5.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17399 = "torch_c.from_builtin_tensor"(%17398) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17400 = "util.global.load"() <{global = @"__auto.blk.5.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17401 = "torch_c.from_builtin_tensor"(%17400) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17402 = "util.global.load"() <{global = @"__auto.blk.5.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17403 = "torch_c.from_builtin_tensor"(%17402) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17404 = "util.global.load"() <{global = @"__auto.blk.5.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17405 = "torch_c.from_builtin_tensor"(%17404) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17406 = "util.global.load"() <{global = @"__auto.blk.5.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17407 = "torch_c.from_builtin_tensor"(%17406) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17408 = "util.global.load"() <{global = @__auto.blk.5.attn_scale}> : () -> tensor<f32>
%17409 = "torch_c.from_builtin_tensor"(%17408) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17410 = "util.global.load"() <{global = @"__auto.blk.5.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17411 = "torch_c.from_builtin_tensor"(%17410) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17412 = "util.global.load"() <{global = @"__auto.blk.5.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17413 = "torch_c.from_builtin_tensor"(%17412) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17414 = "util.global.load"() <{global = @__auto.blk.5.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17415 = "torch_c.from_builtin_tensor"(%17414) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17416 = "util.global.load"() <{global = @"__auto.blk.5.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17417 = "torch_c.from_builtin_tensor"(%17416) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17418 = "util.global.load"() <{global = @"__auto.blk.5.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17419 = "torch_c.from_builtin_tensor"(%17418) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17420 = "util.global.load"() <{global = @"__auto.blk.5.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17421 = "torch_c.from_builtin_tensor"(%17420) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17422 = "util.global.load"() <{global = @"__auto.blk.5.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17423 = "torch_c.from_builtin_tensor"(%17422) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17424 = "util.global.load"() <{global = @"__auto.blk.5.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17425 = "torch_c.from_builtin_tensor"(%17424) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17426 = "util.global.load"() <{global = @"__auto.blk.5.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17427 = "torch_c.from_builtin_tensor"(%17426) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17428 = "util.global.load"() <{global = @__auto.blk.6.attn_norm.weight}> : () -> tensor<4096xbf16>
%17429 = "torch_c.from_builtin_tensor"(%17428) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17430 = "util.global.load"() <{global = @"__auto.blk.6.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17431 = "torch_c.from_builtin_tensor"(%17430) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17432 = "util.global.load"() <{global = @"__auto.blk.6.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17433 = "torch_c.from_builtin_tensor"(%17432) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17434 = "util.global.load"() <{global = @"__auto.blk.6.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17435 = "torch_c.from_builtin_tensor"(%17434) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17436 = "util.global.load"() <{global = @"__auto.blk.6.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17437 = "torch_c.from_builtin_tensor"(%17436) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17438 = "util.global.load"() <{global = @"__auto.blk.6.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17439 = "torch_c.from_builtin_tensor"(%17438) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17440 = "util.global.load"() <{global = @"__auto.blk.6.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17441 = "torch_c.from_builtin_tensor"(%17440) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17442 = "util.global.load"() <{global = @"__auto.blk.6.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17443 = "torch_c.from_builtin_tensor"(%17442) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17444 = "util.global.load"() <{global = @"__auto.blk.6.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17445 = "torch_c.from_builtin_tensor"(%17444) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17446 = "util.global.load"() <{global = @"__auto.blk.6.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17447 = "torch_c.from_builtin_tensor"(%17446) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17448 = "util.global.load"() <{global = @__auto.blk.6.attn_scale}> : () -> tensor<f32>
%17449 = "torch_c.from_builtin_tensor"(%17448) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17450 = "util.global.load"() <{global = @"__auto.blk.6.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17451 = "torch_c.from_builtin_tensor"(%17450) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17452 = "util.global.load"() <{global = @"__auto.blk.6.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17453 = "torch_c.from_builtin_tensor"(%17452) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17454 = "util.global.load"() <{global = @__auto.blk.6.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17455 = "torch_c.from_builtin_tensor"(%17454) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17456 = "util.global.load"() <{global = @"__auto.blk.6.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17457 = "torch_c.from_builtin_tensor"(%17456) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17458 = "util.global.load"() <{global = @"__auto.blk.6.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17459 = "torch_c.from_builtin_tensor"(%17458) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17460 = "util.global.load"() <{global = @"__auto.blk.6.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17461 = "torch_c.from_builtin_tensor"(%17460) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17462 = "util.global.load"() <{global = @"__auto.blk.6.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17463 = "torch_c.from_builtin_tensor"(%17462) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17464 = "util.global.load"() <{global = @"__auto.blk.6.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17465 = "torch_c.from_builtin_tensor"(%17464) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17466 = "util.global.load"() <{global = @"__auto.blk.6.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17467 = "torch_c.from_builtin_tensor"(%17466) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17468 = "util.global.load"() <{global = @__auto.blk.7.attn_norm.weight}> : () -> tensor<4096xbf16>
%17469 = "torch_c.from_builtin_tensor"(%17468) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17470 = "util.global.load"() <{global = @"__auto.blk.7.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17471 = "torch_c.from_builtin_tensor"(%17470) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17472 = "util.global.load"() <{global = @"__auto.blk.7.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17473 = "torch_c.from_builtin_tensor"(%17472) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17474 = "util.global.load"() <{global = @"__auto.blk.7.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17475 = "torch_c.from_builtin_tensor"(%17474) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17476 = "util.global.load"() <{global = @"__auto.blk.7.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17477 = "torch_c.from_builtin_tensor"(%17476) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17478 = "util.global.load"() <{global = @"__auto.blk.7.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17479 = "torch_c.from_builtin_tensor"(%17478) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17480 = "util.global.load"() <{global = @"__auto.blk.7.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17481 = "torch_c.from_builtin_tensor"(%17480) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17482 = "util.global.load"() <{global = @"__auto.blk.7.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17483 = "torch_c.from_builtin_tensor"(%17482) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17484 = "util.global.load"() <{global = @"__auto.blk.7.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17485 = "torch_c.from_builtin_tensor"(%17484) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17486 = "util.global.load"() <{global = @"__auto.blk.7.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17487 = "torch_c.from_builtin_tensor"(%17486) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17488 = "util.global.load"() <{global = @__auto.blk.7.attn_scale}> : () -> tensor<f32>
%17489 = "torch_c.from_builtin_tensor"(%17488) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17490 = "util.global.load"() <{global = @"__auto.blk.7.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17491 = "torch_c.from_builtin_tensor"(%17490) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17492 = "util.global.load"() <{global = @"__auto.blk.7.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17493 = "torch_c.from_builtin_tensor"(%17492) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17494 = "util.global.load"() <{global = @__auto.blk.7.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17495 = "torch_c.from_builtin_tensor"(%17494) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17496 = "util.global.load"() <{global = @"__auto.blk.7.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17497 = "torch_c.from_builtin_tensor"(%17496) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17498 = "util.global.load"() <{global = @"__auto.blk.7.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17499 = "torch_c.from_builtin_tensor"(%17498) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17500 = "util.global.load"() <{global = @"__auto.blk.7.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17501 = "torch_c.from_builtin_tensor"(%17500) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17502 = "util.global.load"() <{global = @"__auto.blk.7.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17503 = "torch_c.from_builtin_tensor"(%17502) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17504 = "util.global.load"() <{global = @"__auto.blk.7.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17505 = "torch_c.from_builtin_tensor"(%17504) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17506 = "util.global.load"() <{global = @"__auto.blk.7.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17507 = "torch_c.from_builtin_tensor"(%17506) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17508 = "util.global.load"() <{global = @__auto.blk.8.attn_norm.weight}> : () -> tensor<4096xbf16>
%17509 = "torch_c.from_builtin_tensor"(%17508) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17510 = "util.global.load"() <{global = @"__auto.blk.8.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17511 = "torch_c.from_builtin_tensor"(%17510) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17512 = "util.global.load"() <{global = @"__auto.blk.8.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17513 = "torch_c.from_builtin_tensor"(%17512) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17514 = "util.global.load"() <{global = @"__auto.blk.8.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17515 = "torch_c.from_builtin_tensor"(%17514) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17516 = "util.global.load"() <{global = @"__auto.blk.8.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17517 = "torch_c.from_builtin_tensor"(%17516) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17518 = "util.global.load"() <{global = @"__auto.blk.8.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17519 = "torch_c.from_builtin_tensor"(%17518) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17520 = "util.global.load"() <{global = @"__auto.blk.8.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17521 = "torch_c.from_builtin_tensor"(%17520) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17522 = "util.global.load"() <{global = @"__auto.blk.8.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17523 = "torch_c.from_builtin_tensor"(%17522) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17524 = "util.global.load"() <{global = @"__auto.blk.8.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17525 = "torch_c.from_builtin_tensor"(%17524) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17526 = "util.global.load"() <{global = @"__auto.blk.8.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17527 = "torch_c.from_builtin_tensor"(%17526) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17528 = "util.global.load"() <{global = @__auto.blk.8.attn_scale}> : () -> tensor<f32>
%17529 = "torch_c.from_builtin_tensor"(%17528) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17530 = "util.global.load"() <{global = @"__auto.blk.8.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17531 = "torch_c.from_builtin_tensor"(%17530) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17532 = "util.global.load"() <{global = @"__auto.blk.8.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17533 = "torch_c.from_builtin_tensor"(%17532) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17534 = "util.global.load"() <{global = @__auto.blk.8.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17535 = "torch_c.from_builtin_tensor"(%17534) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17536 = "util.global.load"() <{global = @"__auto.blk.8.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17537 = "torch_c.from_builtin_tensor"(%17536) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17538 = "util.global.load"() <{global = @"__auto.blk.8.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17539 = "torch_c.from_builtin_tensor"(%17538) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17540 = "util.global.load"() <{global = @"__auto.blk.8.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17541 = "torch_c.from_builtin_tensor"(%17540) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17542 = "util.global.load"() <{global = @"__auto.blk.8.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17543 = "torch_c.from_builtin_tensor"(%17542) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17544 = "util.global.load"() <{global = @"__auto.blk.8.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17545 = "torch_c.from_builtin_tensor"(%17544) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17546 = "util.global.load"() <{global = @"__auto.blk.8.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17547 = "torch_c.from_builtin_tensor"(%17546) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17548 = "util.global.load"() <{global = @__auto.blk.9.attn_norm.weight}> : () -> tensor<4096xbf16>
%17549 = "torch_c.from_builtin_tensor"(%17548) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17550 = "util.global.load"() <{global = @"__auto.blk.9.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17551 = "torch_c.from_builtin_tensor"(%17550) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17552 = "util.global.load"() <{global = @"__auto.blk.9.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17553 = "torch_c.from_builtin_tensor"(%17552) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17554 = "util.global.load"() <{global = @"__auto.blk.9.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17555 = "torch_c.from_builtin_tensor"(%17554) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17556 = "util.global.load"() <{global = @"__auto.blk.9.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17557 = "torch_c.from_builtin_tensor"(%17556) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17558 = "util.global.load"() <{global = @"__auto.blk.9.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17559 = "torch_c.from_builtin_tensor"(%17558) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17560 = "util.global.load"() <{global = @"__auto.blk.9.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17561 = "torch_c.from_builtin_tensor"(%17560) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17562 = "util.global.load"() <{global = @"__auto.blk.9.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17563 = "torch_c.from_builtin_tensor"(%17562) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17564 = "util.global.load"() <{global = @"__auto.blk.9.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17565 = "torch_c.from_builtin_tensor"(%17564) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17566 = "util.global.load"() <{global = @"__auto.blk.9.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17567 = "torch_c.from_builtin_tensor"(%17566) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17568 = "util.global.load"() <{global = @__auto.blk.9.attn_scale}> : () -> tensor<f32>
%17569 = "torch_c.from_builtin_tensor"(%17568) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17570 = "util.global.load"() <{global = @"__auto.blk.9.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17571 = "torch_c.from_builtin_tensor"(%17570) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17572 = "util.global.load"() <{global = @"__auto.blk.9.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17573 = "torch_c.from_builtin_tensor"(%17572) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17574 = "util.global.load"() <{global = @__auto.blk.9.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17575 = "torch_c.from_builtin_tensor"(%17574) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17576 = "util.global.load"() <{global = @"__auto.blk.9.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17577 = "torch_c.from_builtin_tensor"(%17576) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17578 = "util.global.load"() <{global = @"__auto.blk.9.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17579 = "torch_c.from_builtin_tensor"(%17578) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17580 = "util.global.load"() <{global = @"__auto.blk.9.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17581 = "torch_c.from_builtin_tensor"(%17580) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17582 = "util.global.load"() <{global = @"__auto.blk.9.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17583 = "torch_c.from_builtin_tensor"(%17582) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17584 = "util.global.load"() <{global = @"__auto.blk.9.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17585 = "torch_c.from_builtin_tensor"(%17584) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17586 = "util.global.load"() <{global = @"__auto.blk.9.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17587 = "torch_c.from_builtin_tensor"(%17586) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17588 = "util.global.load"() <{global = @__auto.blk.10.attn_norm.weight}> : () -> tensor<4096xbf16>
%17589 = "torch_c.from_builtin_tensor"(%17588) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17590 = "util.global.load"() <{global = @"__auto.blk.10.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17591 = "torch_c.from_builtin_tensor"(%17590) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17592 = "util.global.load"() <{global = @"__auto.blk.10.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17593 = "torch_c.from_builtin_tensor"(%17592) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17594 = "util.global.load"() <{global = @"__auto.blk.10.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17595 = "torch_c.from_builtin_tensor"(%17594) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17596 = "util.global.load"() <{global = @"__auto.blk.10.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17597 = "torch_c.from_builtin_tensor"(%17596) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17598 = "util.global.load"() <{global = @"__auto.blk.10.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17599 = "torch_c.from_builtin_tensor"(%17598) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17600 = "util.global.load"() <{global = @"__auto.blk.10.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17601 = "torch_c.from_builtin_tensor"(%17600) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17602 = "util.global.load"() <{global = @"__auto.blk.10.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17603 = "torch_c.from_builtin_tensor"(%17602) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17604 = "util.global.load"() <{global = @"__auto.blk.10.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17605 = "torch_c.from_builtin_tensor"(%17604) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17606 = "util.global.load"() <{global = @"__auto.blk.10.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17607 = "torch_c.from_builtin_tensor"(%17606) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17608 = "util.global.load"() <{global = @__auto.blk.10.attn_scale}> : () -> tensor<f32>
%17609 = "torch_c.from_builtin_tensor"(%17608) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17610 = "util.global.load"() <{global = @"__auto.blk.10.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17611 = "torch_c.from_builtin_tensor"(%17610) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17612 = "util.global.load"() <{global = @"__auto.blk.10.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17613 = "torch_c.from_builtin_tensor"(%17612) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17614 = "util.global.load"() <{global = @__auto.blk.10.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17615 = "torch_c.from_builtin_tensor"(%17614) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17616 = "util.global.load"() <{global = @"__auto.blk.10.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17617 = "torch_c.from_builtin_tensor"(%17616) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17618 = "util.global.load"() <{global = @"__auto.blk.10.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17619 = "torch_c.from_builtin_tensor"(%17618) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17620 = "util.global.load"() <{global = @"__auto.blk.10.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17621 = "torch_c.from_builtin_tensor"(%17620) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17622 = "util.global.load"() <{global = @"__auto.blk.10.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17623 = "torch_c.from_builtin_tensor"(%17622) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17624 = "util.global.load"() <{global = @"__auto.blk.10.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17625 = "torch_c.from_builtin_tensor"(%17624) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17626 = "util.global.load"() <{global = @"__auto.blk.10.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17627 = "torch_c.from_builtin_tensor"(%17626) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17628 = "util.global.load"() <{global = @__auto.blk.11.attn_norm.weight}> : () -> tensor<4096xbf16>
%17629 = "torch_c.from_builtin_tensor"(%17628) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17630 = "util.global.load"() <{global = @"__auto.blk.11.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17631 = "torch_c.from_builtin_tensor"(%17630) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17632 = "util.global.load"() <{global = @"__auto.blk.11.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17633 = "torch_c.from_builtin_tensor"(%17632) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17634 = "util.global.load"() <{global = @"__auto.blk.11.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17635 = "torch_c.from_builtin_tensor"(%17634) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17636 = "util.global.load"() <{global = @"__auto.blk.11.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17637 = "torch_c.from_builtin_tensor"(%17636) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17638 = "util.global.load"() <{global = @"__auto.blk.11.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17639 = "torch_c.from_builtin_tensor"(%17638) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17640 = "util.global.load"() <{global = @"__auto.blk.11.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17641 = "torch_c.from_builtin_tensor"(%17640) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17642 = "util.global.load"() <{global = @"__auto.blk.11.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17643 = "torch_c.from_builtin_tensor"(%17642) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17644 = "util.global.load"() <{global = @"__auto.blk.11.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17645 = "torch_c.from_builtin_tensor"(%17644) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17646 = "util.global.load"() <{global = @"__auto.blk.11.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17647 = "torch_c.from_builtin_tensor"(%17646) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17648 = "util.global.load"() <{global = @__auto.blk.11.attn_scale}> : () -> tensor<f32>
%17649 = "torch_c.from_builtin_tensor"(%17648) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17650 = "util.global.load"() <{global = @"__auto.blk.11.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17651 = "torch_c.from_builtin_tensor"(%17650) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17652 = "util.global.load"() <{global = @"__auto.blk.11.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17653 = "torch_c.from_builtin_tensor"(%17652) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17654 = "util.global.load"() <{global = @__auto.blk.11.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17655 = "torch_c.from_builtin_tensor"(%17654) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17656 = "util.global.load"() <{global = @"__auto.blk.11.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17657 = "torch_c.from_builtin_tensor"(%17656) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17658 = "util.global.load"() <{global = @"__auto.blk.11.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17659 = "torch_c.from_builtin_tensor"(%17658) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17660 = "util.global.load"() <{global = @"__auto.blk.11.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17661 = "torch_c.from_builtin_tensor"(%17660) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17662 = "util.global.load"() <{global = @"__auto.blk.11.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17663 = "torch_c.from_builtin_tensor"(%17662) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17664 = "util.global.load"() <{global = @"__auto.blk.11.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17665 = "torch_c.from_builtin_tensor"(%17664) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17666 = "util.global.load"() <{global = @"__auto.blk.11.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17667 = "torch_c.from_builtin_tensor"(%17666) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17668 = "util.global.load"() <{global = @__auto.blk.12.attn_norm.weight}> : () -> tensor<4096xbf16>
%17669 = "torch_c.from_builtin_tensor"(%17668) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17670 = "util.global.load"() <{global = @"__auto.blk.12.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17671 = "torch_c.from_builtin_tensor"(%17670) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17672 = "util.global.load"() <{global = @"__auto.blk.12.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17673 = "torch_c.from_builtin_tensor"(%17672) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17674 = "util.global.load"() <{global = @"__auto.blk.12.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17675 = "torch_c.from_builtin_tensor"(%17674) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17676 = "util.global.load"() <{global = @"__auto.blk.12.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17677 = "torch_c.from_builtin_tensor"(%17676) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17678 = "util.global.load"() <{global = @"__auto.blk.12.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17679 = "torch_c.from_builtin_tensor"(%17678) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17680 = "util.global.load"() <{global = @"__auto.blk.12.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17681 = "torch_c.from_builtin_tensor"(%17680) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17682 = "util.global.load"() <{global = @"__auto.blk.12.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17683 = "torch_c.from_builtin_tensor"(%17682) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17684 = "util.global.load"() <{global = @"__auto.blk.12.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17685 = "torch_c.from_builtin_tensor"(%17684) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17686 = "util.global.load"() <{global = @"__auto.blk.12.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17687 = "torch_c.from_builtin_tensor"(%17686) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17688 = "util.global.load"() <{global = @__auto.blk.12.attn_scale}> : () -> tensor<f32>
%17689 = "torch_c.from_builtin_tensor"(%17688) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17690 = "util.global.load"() <{global = @"__auto.blk.12.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17691 = "torch_c.from_builtin_tensor"(%17690) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17692 = "util.global.load"() <{global = @"__auto.blk.12.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17693 = "torch_c.from_builtin_tensor"(%17692) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17694 = "util.global.load"() <{global = @__auto.blk.12.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17695 = "torch_c.from_builtin_tensor"(%17694) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17696 = "util.global.load"() <{global = @"__auto.blk.12.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17697 = "torch_c.from_builtin_tensor"(%17696) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17698 = "util.global.load"() <{global = @"__auto.blk.12.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17699 = "torch_c.from_builtin_tensor"(%17698) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17700 = "util.global.load"() <{global = @"__auto.blk.12.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17701 = "torch_c.from_builtin_tensor"(%17700) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17702 = "util.global.load"() <{global = @"__auto.blk.12.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17703 = "torch_c.from_builtin_tensor"(%17702) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17704 = "util.global.load"() <{global = @"__auto.blk.12.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17705 = "torch_c.from_builtin_tensor"(%17704) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17706 = "util.global.load"() <{global = @"__auto.blk.12.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17707 = "torch_c.from_builtin_tensor"(%17706) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17708 = "util.global.load"() <{global = @__auto.blk.13.attn_norm.weight}> : () -> tensor<4096xbf16>
%17709 = "torch_c.from_builtin_tensor"(%17708) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17710 = "util.global.load"() <{global = @"__auto.blk.13.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17711 = "torch_c.from_builtin_tensor"(%17710) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17712 = "util.global.load"() <{global = @"__auto.blk.13.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17713 = "torch_c.from_builtin_tensor"(%17712) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17714 = "util.global.load"() <{global = @"__auto.blk.13.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17715 = "torch_c.from_builtin_tensor"(%17714) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17716 = "util.global.load"() <{global = @"__auto.blk.13.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17717 = "torch_c.from_builtin_tensor"(%17716) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17718 = "util.global.load"() <{global = @"__auto.blk.13.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17719 = "torch_c.from_builtin_tensor"(%17718) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17720 = "util.global.load"() <{global = @"__auto.blk.13.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17721 = "torch_c.from_builtin_tensor"(%17720) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17722 = "util.global.load"() <{global = @"__auto.blk.13.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17723 = "torch_c.from_builtin_tensor"(%17722) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17724 = "util.global.load"() <{global = @"__auto.blk.13.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17725 = "torch_c.from_builtin_tensor"(%17724) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17726 = "util.global.load"() <{global = @"__auto.blk.13.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17727 = "torch_c.from_builtin_tensor"(%17726) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17728 = "util.global.load"() <{global = @__auto.blk.13.attn_scale}> : () -> tensor<f32>
%17729 = "torch_c.from_builtin_tensor"(%17728) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17730 = "util.global.load"() <{global = @"__auto.blk.13.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17731 = "torch_c.from_builtin_tensor"(%17730) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17732 = "util.global.load"() <{global = @"__auto.blk.13.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17733 = "torch_c.from_builtin_tensor"(%17732) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17734 = "util.global.load"() <{global = @__auto.blk.13.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17735 = "torch_c.from_builtin_tensor"(%17734) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17736 = "util.global.load"() <{global = @"__auto.blk.13.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17737 = "torch_c.from_builtin_tensor"(%17736) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17738 = "util.global.load"() <{global = @"__auto.blk.13.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17739 = "torch_c.from_builtin_tensor"(%17738) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17740 = "util.global.load"() <{global = @"__auto.blk.13.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17741 = "torch_c.from_builtin_tensor"(%17740) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17742 = "util.global.load"() <{global = @"__auto.blk.13.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17743 = "torch_c.from_builtin_tensor"(%17742) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17744 = "util.global.load"() <{global = @"__auto.blk.13.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17745 = "torch_c.from_builtin_tensor"(%17744) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17746 = "util.global.load"() <{global = @"__auto.blk.13.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17747 = "torch_c.from_builtin_tensor"(%17746) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17748 = "util.global.load"() <{global = @__auto.blk.14.attn_norm.weight}> : () -> tensor<4096xbf16>
%17749 = "torch_c.from_builtin_tensor"(%17748) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17750 = "util.global.load"() <{global = @"__auto.blk.14.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17751 = "torch_c.from_builtin_tensor"(%17750) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17752 = "util.global.load"() <{global = @"__auto.blk.14.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17753 = "torch_c.from_builtin_tensor"(%17752) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17754 = "util.global.load"() <{global = @"__auto.blk.14.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17755 = "torch_c.from_builtin_tensor"(%17754) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17756 = "util.global.load"() <{global = @"__auto.blk.14.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17757 = "torch_c.from_builtin_tensor"(%17756) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17758 = "util.global.load"() <{global = @"__auto.blk.14.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17759 = "torch_c.from_builtin_tensor"(%17758) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17760 = "util.global.load"() <{global = @"__auto.blk.14.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17761 = "torch_c.from_builtin_tensor"(%17760) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17762 = "util.global.load"() <{global = @"__auto.blk.14.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17763 = "torch_c.from_builtin_tensor"(%17762) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17764 = "util.global.load"() <{global = @"__auto.blk.14.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17765 = "torch_c.from_builtin_tensor"(%17764) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17766 = "util.global.load"() <{global = @"__auto.blk.14.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17767 = "torch_c.from_builtin_tensor"(%17766) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17768 = "util.global.load"() <{global = @__auto.blk.14.attn_scale}> : () -> tensor<f32>
%17769 = "torch_c.from_builtin_tensor"(%17768) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17770 = "util.global.load"() <{global = @"__auto.blk.14.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17771 = "torch_c.from_builtin_tensor"(%17770) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17772 = "util.global.load"() <{global = @"__auto.blk.14.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17773 = "torch_c.from_builtin_tensor"(%17772) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17774 = "util.global.load"() <{global = @__auto.blk.14.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17775 = "torch_c.from_builtin_tensor"(%17774) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17776 = "util.global.load"() <{global = @"__auto.blk.14.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17777 = "torch_c.from_builtin_tensor"(%17776) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17778 = "util.global.load"() <{global = @"__auto.blk.14.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17779 = "torch_c.from_builtin_tensor"(%17778) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17780 = "util.global.load"() <{global = @"__auto.blk.14.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17781 = "torch_c.from_builtin_tensor"(%17780) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17782 = "util.global.load"() <{global = @"__auto.blk.14.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17783 = "torch_c.from_builtin_tensor"(%17782) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17784 = "util.global.load"() <{global = @"__auto.blk.14.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17785 = "torch_c.from_builtin_tensor"(%17784) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17786 = "util.global.load"() <{global = @"__auto.blk.14.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17787 = "torch_c.from_builtin_tensor"(%17786) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17788 = "util.global.load"() <{global = @__auto.blk.15.attn_norm.weight}> : () -> tensor<4096xbf16>
%17789 = "torch_c.from_builtin_tensor"(%17788) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17790 = "util.global.load"() <{global = @"__auto.blk.15.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17791 = "torch_c.from_builtin_tensor"(%17790) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17792 = "util.global.load"() <{global = @"__auto.blk.15.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17793 = "torch_c.from_builtin_tensor"(%17792) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17794 = "util.global.load"() <{global = @"__auto.blk.15.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17795 = "torch_c.from_builtin_tensor"(%17794) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17796 = "util.global.load"() <{global = @"__auto.blk.15.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17797 = "torch_c.from_builtin_tensor"(%17796) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17798 = "util.global.load"() <{global = @"__auto.blk.15.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17799 = "torch_c.from_builtin_tensor"(%17798) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17800 = "util.global.load"() <{global = @"__auto.blk.15.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17801 = "torch_c.from_builtin_tensor"(%17800) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17802 = "util.global.load"() <{global = @"__auto.blk.15.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17803 = "torch_c.from_builtin_tensor"(%17802) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17804 = "util.global.load"() <{global = @"__auto.blk.15.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17805 = "torch_c.from_builtin_tensor"(%17804) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17806 = "util.global.load"() <{global = @"__auto.blk.15.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17807 = "torch_c.from_builtin_tensor"(%17806) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17808 = "util.global.load"() <{global = @__auto.blk.15.attn_scale}> : () -> tensor<f32>
%17809 = "torch_c.from_builtin_tensor"(%17808) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17810 = "util.global.load"() <{global = @"__auto.blk.15.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17811 = "torch_c.from_builtin_tensor"(%17810) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17812 = "util.global.load"() <{global = @"__auto.blk.15.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17813 = "torch_c.from_builtin_tensor"(%17812) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17814 = "util.global.load"() <{global = @__auto.blk.15.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17815 = "torch_c.from_builtin_tensor"(%17814) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17816 = "util.global.load"() <{global = @"__auto.blk.15.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17817 = "torch_c.from_builtin_tensor"(%17816) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17818 = "util.global.load"() <{global = @"__auto.blk.15.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17819 = "torch_c.from_builtin_tensor"(%17818) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17820 = "util.global.load"() <{global = @"__auto.blk.15.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17821 = "torch_c.from_builtin_tensor"(%17820) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17822 = "util.global.load"() <{global = @"__auto.blk.15.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17823 = "torch_c.from_builtin_tensor"(%17822) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17824 = "util.global.load"() <{global = @"__auto.blk.15.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17825 = "torch_c.from_builtin_tensor"(%17824) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17826 = "util.global.load"() <{global = @"__auto.blk.15.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17827 = "torch_c.from_builtin_tensor"(%17826) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17828 = "util.global.load"() <{global = @__auto.blk.16.attn_norm.weight}> : () -> tensor<4096xbf16>
%17829 = "torch_c.from_builtin_tensor"(%17828) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17830 = "util.global.load"() <{global = @"__auto.blk.16.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17831 = "torch_c.from_builtin_tensor"(%17830) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17832 = "util.global.load"() <{global = @"__auto.blk.16.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17833 = "torch_c.from_builtin_tensor"(%17832) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17834 = "util.global.load"() <{global = @"__auto.blk.16.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17835 = "torch_c.from_builtin_tensor"(%17834) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17836 = "util.global.load"() <{global = @"__auto.blk.16.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17837 = "torch_c.from_builtin_tensor"(%17836) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17838 = "util.global.load"() <{global = @"__auto.blk.16.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17839 = "torch_c.from_builtin_tensor"(%17838) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17840 = "util.global.load"() <{global = @"__auto.blk.16.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17841 = "torch_c.from_builtin_tensor"(%17840) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17842 = "util.global.load"() <{global = @"__auto.blk.16.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17843 = "torch_c.from_builtin_tensor"(%17842) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17844 = "util.global.load"() <{global = @"__auto.blk.16.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17845 = "torch_c.from_builtin_tensor"(%17844) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17846 = "util.global.load"() <{global = @"__auto.blk.16.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17847 = "torch_c.from_builtin_tensor"(%17846) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17848 = "util.global.load"() <{global = @__auto.blk.16.attn_scale}> : () -> tensor<f32>
%17849 = "torch_c.from_builtin_tensor"(%17848) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17850 = "util.global.load"() <{global = @"__auto.blk.16.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17851 = "torch_c.from_builtin_tensor"(%17850) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17852 = "util.global.load"() <{global = @"__auto.blk.16.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17853 = "torch_c.from_builtin_tensor"(%17852) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17854 = "util.global.load"() <{global = @__auto.blk.16.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17855 = "torch_c.from_builtin_tensor"(%17854) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17856 = "util.global.load"() <{global = @"__auto.blk.16.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17857 = "torch_c.from_builtin_tensor"(%17856) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17858 = "util.global.load"() <{global = @"__auto.blk.16.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17859 = "torch_c.from_builtin_tensor"(%17858) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17860 = "util.global.load"() <{global = @"__auto.blk.16.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17861 = "torch_c.from_builtin_tensor"(%17860) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17862 = "util.global.load"() <{global = @"__auto.blk.16.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17863 = "torch_c.from_builtin_tensor"(%17862) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17864 = "util.global.load"() <{global = @"__auto.blk.16.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17865 = "torch_c.from_builtin_tensor"(%17864) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17866 = "util.global.load"() <{global = @"__auto.blk.16.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17867 = "torch_c.from_builtin_tensor"(%17866) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17868 = "util.global.load"() <{global = @__auto.blk.17.attn_norm.weight}> : () -> tensor<4096xbf16>
%17869 = "torch_c.from_builtin_tensor"(%17868) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17870 = "util.global.load"() <{global = @"__auto.blk.17.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17871 = "torch_c.from_builtin_tensor"(%17870) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17872 = "util.global.load"() <{global = @"__auto.blk.17.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17873 = "torch_c.from_builtin_tensor"(%17872) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17874 = "util.global.load"() <{global = @"__auto.blk.17.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17875 = "torch_c.from_builtin_tensor"(%17874) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17876 = "util.global.load"() <{global = @"__auto.blk.17.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17877 = "torch_c.from_builtin_tensor"(%17876) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17878 = "util.global.load"() <{global = @"__auto.blk.17.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17879 = "torch_c.from_builtin_tensor"(%17878) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17880 = "util.global.load"() <{global = @"__auto.blk.17.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17881 = "torch_c.from_builtin_tensor"(%17880) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17882 = "util.global.load"() <{global = @"__auto.blk.17.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17883 = "torch_c.from_builtin_tensor"(%17882) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17884 = "util.global.load"() <{global = @"__auto.blk.17.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17885 = "torch_c.from_builtin_tensor"(%17884) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17886 = "util.global.load"() <{global = @"__auto.blk.17.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17887 = "torch_c.from_builtin_tensor"(%17886) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17888 = "util.global.load"() <{global = @__auto.blk.17.attn_scale}> : () -> tensor<f32>
%17889 = "torch_c.from_builtin_tensor"(%17888) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17890 = "util.global.load"() <{global = @"__auto.blk.17.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17891 = "torch_c.from_builtin_tensor"(%17890) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17892 = "util.global.load"() <{global = @"__auto.blk.17.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17893 = "torch_c.from_builtin_tensor"(%17892) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17894 = "util.global.load"() <{global = @__auto.blk.17.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17895 = "torch_c.from_builtin_tensor"(%17894) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17896 = "util.global.load"() <{global = @"__auto.blk.17.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17897 = "torch_c.from_builtin_tensor"(%17896) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17898 = "util.global.load"() <{global = @"__auto.blk.17.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17899 = "torch_c.from_builtin_tensor"(%17898) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17900 = "util.global.load"() <{global = @"__auto.blk.17.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17901 = "torch_c.from_builtin_tensor"(%17900) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17902 = "util.global.load"() <{global = @"__auto.blk.17.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17903 = "torch_c.from_builtin_tensor"(%17902) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17904 = "util.global.load"() <{global = @"__auto.blk.17.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17905 = "torch_c.from_builtin_tensor"(%17904) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17906 = "util.global.load"() <{global = @"__auto.blk.17.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17907 = "torch_c.from_builtin_tensor"(%17906) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17908 = "util.global.load"() <{global = @__auto.blk.18.attn_norm.weight}> : () -> tensor<4096xbf16>
%17909 = "torch_c.from_builtin_tensor"(%17908) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17910 = "util.global.load"() <{global = @"__auto.blk.18.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17911 = "torch_c.from_builtin_tensor"(%17910) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17912 = "util.global.load"() <{global = @"__auto.blk.18.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17913 = "torch_c.from_builtin_tensor"(%17912) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17914 = "util.global.load"() <{global = @"__auto.blk.18.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17915 = "torch_c.from_builtin_tensor"(%17914) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17916 = "util.global.load"() <{global = @"__auto.blk.18.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17917 = "torch_c.from_builtin_tensor"(%17916) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17918 = "util.global.load"() <{global = @"__auto.blk.18.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17919 = "torch_c.from_builtin_tensor"(%17918) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17920 = "util.global.load"() <{global = @"__auto.blk.18.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17921 = "torch_c.from_builtin_tensor"(%17920) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17922 = "util.global.load"() <{global = @"__auto.blk.18.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17923 = "torch_c.from_builtin_tensor"(%17922) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17924 = "util.global.load"() <{global = @"__auto.blk.18.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17925 = "torch_c.from_builtin_tensor"(%17924) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17926 = "util.global.load"() <{global = @"__auto.blk.18.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17927 = "torch_c.from_builtin_tensor"(%17926) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17928 = "util.global.load"() <{global = @__auto.blk.18.attn_scale}> : () -> tensor<f32>
%17929 = "torch_c.from_builtin_tensor"(%17928) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17930 = "util.global.load"() <{global = @"__auto.blk.18.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17931 = "torch_c.from_builtin_tensor"(%17930) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17932 = "util.global.load"() <{global = @"__auto.blk.18.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17933 = "torch_c.from_builtin_tensor"(%17932) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17934 = "util.global.load"() <{global = @__auto.blk.18.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17935 = "torch_c.from_builtin_tensor"(%17934) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17936 = "util.global.load"() <{global = @"__auto.blk.18.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17937 = "torch_c.from_builtin_tensor"(%17936) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17938 = "util.global.load"() <{global = @"__auto.blk.18.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17939 = "torch_c.from_builtin_tensor"(%17938) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17940 = "util.global.load"() <{global = @"__auto.blk.18.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17941 = "torch_c.from_builtin_tensor"(%17940) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17942 = "util.global.load"() <{global = @"__auto.blk.18.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17943 = "torch_c.from_builtin_tensor"(%17942) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17944 = "util.global.load"() <{global = @"__auto.blk.18.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17945 = "torch_c.from_builtin_tensor"(%17944) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17946 = "util.global.load"() <{global = @"__auto.blk.18.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17947 = "torch_c.from_builtin_tensor"(%17946) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17948 = "util.global.load"() <{global = @__auto.blk.19.attn_norm.weight}> : () -> tensor<4096xbf16>
%17949 = "torch_c.from_builtin_tensor"(%17948) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17950 = "util.global.load"() <{global = @"__auto.blk.19.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17951 = "torch_c.from_builtin_tensor"(%17950) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17952 = "util.global.load"() <{global = @"__auto.blk.19.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17953 = "torch_c.from_builtin_tensor"(%17952) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17954 = "util.global.load"() <{global = @"__auto.blk.19.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17955 = "torch_c.from_builtin_tensor"(%17954) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17956 = "util.global.load"() <{global = @"__auto.blk.19.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17957 = "torch_c.from_builtin_tensor"(%17956) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17958 = "util.global.load"() <{global = @"__auto.blk.19.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17959 = "torch_c.from_builtin_tensor"(%17958) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17960 = "util.global.load"() <{global = @"__auto.blk.19.attn_k.q_output:rscale"}> : () -> tensor<f32>
%17961 = "torch_c.from_builtin_tensor"(%17960) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17962 = "util.global.load"() <{global = @"__auto.blk.19.attn_v.q_input:rscale"}> : () -> tensor<f32>
%17963 = "torch_c.from_builtin_tensor"(%17962) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17964 = "util.global.load"() <{global = @"__auto.blk.19.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17965 = "torch_c.from_builtin_tensor"(%17964) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%17966 = "util.global.load"() <{global = @"__auto.blk.19.attn_v.q_output:rscale"}> : () -> tensor<f32>
%17967 = "torch_c.from_builtin_tensor"(%17966) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17968 = "util.global.load"() <{global = @__auto.blk.19.attn_scale}> : () -> tensor<f32>
%17969 = "torch_c.from_builtin_tensor"(%17968) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17970 = "util.global.load"() <{global = @"__auto.blk.19.attn_output.q_input:rscale"}> : () -> tensor<f32>
%17971 = "torch_c.from_builtin_tensor"(%17970) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17972 = "util.global.load"() <{global = @"__auto.blk.19.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17973 = "torch_c.from_builtin_tensor"(%17972) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17974 = "util.global.load"() <{global = @__auto.blk.19.ffn_norm.weight}> : () -> tensor<4096xbf16>
%17975 = "torch_c.from_builtin_tensor"(%17974) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17976 = "util.global.load"() <{global = @"__auto.blk.19.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%17977 = "torch_c.from_builtin_tensor"(%17976) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17978 = "util.global.load"() <{global = @"__auto.blk.19.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17979 = "torch_c.from_builtin_tensor"(%17978) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17980 = "util.global.load"() <{global = @"__auto.blk.19.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%17981 = "torch_c.from_builtin_tensor"(%17980) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17982 = "util.global.load"() <{global = @"__auto.blk.19.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%17983 = "torch_c.from_builtin_tensor"(%17982) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%17984 = "util.global.load"() <{global = @"__auto.blk.19.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%17985 = "torch_c.from_builtin_tensor"(%17984) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17986 = "util.global.load"() <{global = @"__auto.blk.19.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%17987 = "torch_c.from_builtin_tensor"(%17986) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%17988 = "util.global.load"() <{global = @__auto.blk.20.attn_norm.weight}> : () -> tensor<4096xbf16>
%17989 = "torch_c.from_builtin_tensor"(%17988) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%17990 = "util.global.load"() <{global = @"__auto.blk.20.attn_q.q_input:rscale"}> : () -> tensor<f32>
%17991 = "torch_c.from_builtin_tensor"(%17990) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17992 = "util.global.load"() <{global = @"__auto.blk.20.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%17993 = "torch_c.from_builtin_tensor"(%17992) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%17994 = "util.global.load"() <{global = @"__auto.blk.20.attn_q.q_output:rscale"}> : () -> tensor<f32>
%17995 = "torch_c.from_builtin_tensor"(%17994) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17996 = "util.global.load"() <{global = @"__auto.blk.20.attn_k.q_input:rscale"}> : () -> tensor<f32>
%17997 = "torch_c.from_builtin_tensor"(%17996) : (tensor<f32>) -> !torch.vtensor<[],f32>
%17998 = "util.global.load"() <{global = @"__auto.blk.20.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%17999 = "torch_c.from_builtin_tensor"(%17998) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18000 = "util.global.load"() <{global = @"__auto.blk.20.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18001 = "torch_c.from_builtin_tensor"(%18000) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18002 = "util.global.load"() <{global = @"__auto.blk.20.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18003 = "torch_c.from_builtin_tensor"(%18002) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18004 = "util.global.load"() <{global = @"__auto.blk.20.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18005 = "torch_c.from_builtin_tensor"(%18004) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18006 = "util.global.load"() <{global = @"__auto.blk.20.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18007 = "torch_c.from_builtin_tensor"(%18006) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18008 = "util.global.load"() <{global = @__auto.blk.20.attn_scale}> : () -> tensor<f32>
%18009 = "torch_c.from_builtin_tensor"(%18008) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18010 = "util.global.load"() <{global = @"__auto.blk.20.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18011 = "torch_c.from_builtin_tensor"(%18010) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18012 = "util.global.load"() <{global = @"__auto.blk.20.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18013 = "torch_c.from_builtin_tensor"(%18012) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18014 = "util.global.load"() <{global = @__auto.blk.20.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18015 = "torch_c.from_builtin_tensor"(%18014) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18016 = "util.global.load"() <{global = @"__auto.blk.20.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18017 = "torch_c.from_builtin_tensor"(%18016) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18018 = "util.global.load"() <{global = @"__auto.blk.20.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18019 = "torch_c.from_builtin_tensor"(%18018) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18020 = "util.global.load"() <{global = @"__auto.blk.20.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18021 = "torch_c.from_builtin_tensor"(%18020) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18022 = "util.global.load"() <{global = @"__auto.blk.20.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18023 = "torch_c.from_builtin_tensor"(%18022) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18024 = "util.global.load"() <{global = @"__auto.blk.20.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18025 = "torch_c.from_builtin_tensor"(%18024) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18026 = "util.global.load"() <{global = @"__auto.blk.20.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18027 = "torch_c.from_builtin_tensor"(%18026) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18028 = "util.global.load"() <{global = @__auto.blk.21.attn_norm.weight}> : () -> tensor<4096xbf16>
%18029 = "torch_c.from_builtin_tensor"(%18028) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18030 = "util.global.load"() <{global = @"__auto.blk.21.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18031 = "torch_c.from_builtin_tensor"(%18030) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18032 = "util.global.load"() <{global = @"__auto.blk.21.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18033 = "torch_c.from_builtin_tensor"(%18032) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18034 = "util.global.load"() <{global = @"__auto.blk.21.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18035 = "torch_c.from_builtin_tensor"(%18034) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18036 = "util.global.load"() <{global = @"__auto.blk.21.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18037 = "torch_c.from_builtin_tensor"(%18036) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18038 = "util.global.load"() <{global = @"__auto.blk.21.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18039 = "torch_c.from_builtin_tensor"(%18038) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18040 = "util.global.load"() <{global = @"__auto.blk.21.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18041 = "torch_c.from_builtin_tensor"(%18040) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18042 = "util.global.load"() <{global = @"__auto.blk.21.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18043 = "torch_c.from_builtin_tensor"(%18042) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18044 = "util.global.load"() <{global = @"__auto.blk.21.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18045 = "torch_c.from_builtin_tensor"(%18044) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18046 = "util.global.load"() <{global = @"__auto.blk.21.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18047 = "torch_c.from_builtin_tensor"(%18046) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18048 = "util.global.load"() <{global = @__auto.blk.21.attn_scale}> : () -> tensor<f32>
%18049 = "torch_c.from_builtin_tensor"(%18048) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18050 = "util.global.load"() <{global = @"__auto.blk.21.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18051 = "torch_c.from_builtin_tensor"(%18050) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18052 = "util.global.load"() <{global = @"__auto.blk.21.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18053 = "torch_c.from_builtin_tensor"(%18052) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18054 = "util.global.load"() <{global = @__auto.blk.21.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18055 = "torch_c.from_builtin_tensor"(%18054) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18056 = "util.global.load"() <{global = @"__auto.blk.21.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18057 = "torch_c.from_builtin_tensor"(%18056) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18058 = "util.global.load"() <{global = @"__auto.blk.21.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18059 = "torch_c.from_builtin_tensor"(%18058) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18060 = "util.global.load"() <{global = @"__auto.blk.21.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18061 = "torch_c.from_builtin_tensor"(%18060) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18062 = "util.global.load"() <{global = @"__auto.blk.21.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18063 = "torch_c.from_builtin_tensor"(%18062) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18064 = "util.global.load"() <{global = @"__auto.blk.21.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18065 = "torch_c.from_builtin_tensor"(%18064) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18066 = "util.global.load"() <{global = @"__auto.blk.21.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18067 = "torch_c.from_builtin_tensor"(%18066) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18068 = "util.global.load"() <{global = @__auto.blk.22.attn_norm.weight}> : () -> tensor<4096xbf16>
%18069 = "torch_c.from_builtin_tensor"(%18068) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18070 = "util.global.load"() <{global = @"__auto.blk.22.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18071 = "torch_c.from_builtin_tensor"(%18070) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18072 = "util.global.load"() <{global = @"__auto.blk.22.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18073 = "torch_c.from_builtin_tensor"(%18072) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18074 = "util.global.load"() <{global = @"__auto.blk.22.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18075 = "torch_c.from_builtin_tensor"(%18074) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18076 = "util.global.load"() <{global = @"__auto.blk.22.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18077 = "torch_c.from_builtin_tensor"(%18076) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18078 = "util.global.load"() <{global = @"__auto.blk.22.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18079 = "torch_c.from_builtin_tensor"(%18078) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18080 = "util.global.load"() <{global = @"__auto.blk.22.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18081 = "torch_c.from_builtin_tensor"(%18080) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18082 = "util.global.load"() <{global = @"__auto.blk.22.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18083 = "torch_c.from_builtin_tensor"(%18082) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18084 = "util.global.load"() <{global = @"__auto.blk.22.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18085 = "torch_c.from_builtin_tensor"(%18084) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18086 = "util.global.load"() <{global = @"__auto.blk.22.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18087 = "torch_c.from_builtin_tensor"(%18086) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18088 = "util.global.load"() <{global = @__auto.blk.22.attn_scale}> : () -> tensor<f32>
%18089 = "torch_c.from_builtin_tensor"(%18088) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18090 = "util.global.load"() <{global = @"__auto.blk.22.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18091 = "torch_c.from_builtin_tensor"(%18090) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18092 = "util.global.load"() <{global = @"__auto.blk.22.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18093 = "torch_c.from_builtin_tensor"(%18092) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18094 = "util.global.load"() <{global = @__auto.blk.22.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18095 = "torch_c.from_builtin_tensor"(%18094) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18096 = "util.global.load"() <{global = @"__auto.blk.22.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18097 = "torch_c.from_builtin_tensor"(%18096) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18098 = "util.global.load"() <{global = @"__auto.blk.22.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18099 = "torch_c.from_builtin_tensor"(%18098) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18100 = "util.global.load"() <{global = @"__auto.blk.22.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18101 = "torch_c.from_builtin_tensor"(%18100) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18102 = "util.global.load"() <{global = @"__auto.blk.22.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18103 = "torch_c.from_builtin_tensor"(%18102) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18104 = "util.global.load"() <{global = @"__auto.blk.22.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18105 = "torch_c.from_builtin_tensor"(%18104) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18106 = "util.global.load"() <{global = @"__auto.blk.22.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18107 = "torch_c.from_builtin_tensor"(%18106) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18108 = "util.global.load"() <{global = @__auto.blk.23.attn_norm.weight}> : () -> tensor<4096xbf16>
%18109 = "torch_c.from_builtin_tensor"(%18108) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18110 = "util.global.load"() <{global = @"__auto.blk.23.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18111 = "torch_c.from_builtin_tensor"(%18110) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18112 = "util.global.load"() <{global = @"__auto.blk.23.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18113 = "torch_c.from_builtin_tensor"(%18112) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18114 = "util.global.load"() <{global = @"__auto.blk.23.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18115 = "torch_c.from_builtin_tensor"(%18114) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18116 = "util.global.load"() <{global = @"__auto.blk.23.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18117 = "torch_c.from_builtin_tensor"(%18116) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18118 = "util.global.load"() <{global = @"__auto.blk.23.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18119 = "torch_c.from_builtin_tensor"(%18118) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18120 = "util.global.load"() <{global = @"__auto.blk.23.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18121 = "torch_c.from_builtin_tensor"(%18120) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18122 = "util.global.load"() <{global = @"__auto.blk.23.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18123 = "torch_c.from_builtin_tensor"(%18122) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18124 = "util.global.load"() <{global = @"__auto.blk.23.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18125 = "torch_c.from_builtin_tensor"(%18124) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18126 = "util.global.load"() <{global = @"__auto.blk.23.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18127 = "torch_c.from_builtin_tensor"(%18126) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18128 = "util.global.load"() <{global = @__auto.blk.23.attn_scale}> : () -> tensor<f32>
%18129 = "torch_c.from_builtin_tensor"(%18128) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18130 = "util.global.load"() <{global = @"__auto.blk.23.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18131 = "torch_c.from_builtin_tensor"(%18130) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18132 = "util.global.load"() <{global = @"__auto.blk.23.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18133 = "torch_c.from_builtin_tensor"(%18132) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18134 = "util.global.load"() <{global = @__auto.blk.23.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18135 = "torch_c.from_builtin_tensor"(%18134) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18136 = "util.global.load"() <{global = @"__auto.blk.23.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18137 = "torch_c.from_builtin_tensor"(%18136) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18138 = "util.global.load"() <{global = @"__auto.blk.23.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18139 = "torch_c.from_builtin_tensor"(%18138) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18140 = "util.global.load"() <{global = @"__auto.blk.23.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18141 = "torch_c.from_builtin_tensor"(%18140) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18142 = "util.global.load"() <{global = @"__auto.blk.23.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18143 = "torch_c.from_builtin_tensor"(%18142) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18144 = "util.global.load"() <{global = @"__auto.blk.23.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18145 = "torch_c.from_builtin_tensor"(%18144) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18146 = "util.global.load"() <{global = @"__auto.blk.23.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18147 = "torch_c.from_builtin_tensor"(%18146) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18148 = "util.global.load"() <{global = @__auto.blk.24.attn_norm.weight}> : () -> tensor<4096xbf16>
%18149 = "torch_c.from_builtin_tensor"(%18148) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18150 = "util.global.load"() <{global = @"__auto.blk.24.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18151 = "torch_c.from_builtin_tensor"(%18150) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18152 = "util.global.load"() <{global = @"__auto.blk.24.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18153 = "torch_c.from_builtin_tensor"(%18152) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18154 = "util.global.load"() <{global = @"__auto.blk.24.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18155 = "torch_c.from_builtin_tensor"(%18154) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18156 = "util.global.load"() <{global = @"__auto.blk.24.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18157 = "torch_c.from_builtin_tensor"(%18156) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18158 = "util.global.load"() <{global = @"__auto.blk.24.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18159 = "torch_c.from_builtin_tensor"(%18158) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18160 = "util.global.load"() <{global = @"__auto.blk.24.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18161 = "torch_c.from_builtin_tensor"(%18160) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18162 = "util.global.load"() <{global = @"__auto.blk.24.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18163 = "torch_c.from_builtin_tensor"(%18162) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18164 = "util.global.load"() <{global = @"__auto.blk.24.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18165 = "torch_c.from_builtin_tensor"(%18164) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18166 = "util.global.load"() <{global = @"__auto.blk.24.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18167 = "torch_c.from_builtin_tensor"(%18166) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18168 = "util.global.load"() <{global = @__auto.blk.24.attn_scale}> : () -> tensor<f32>
%18169 = "torch_c.from_builtin_tensor"(%18168) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18170 = "util.global.load"() <{global = @"__auto.blk.24.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18171 = "torch_c.from_builtin_tensor"(%18170) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18172 = "util.global.load"() <{global = @"__auto.blk.24.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18173 = "torch_c.from_builtin_tensor"(%18172) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18174 = "util.global.load"() <{global = @__auto.blk.24.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18175 = "torch_c.from_builtin_tensor"(%18174) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18176 = "util.global.load"() <{global = @"__auto.blk.24.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18177 = "torch_c.from_builtin_tensor"(%18176) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18178 = "util.global.load"() <{global = @"__auto.blk.24.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18179 = "torch_c.from_builtin_tensor"(%18178) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18180 = "util.global.load"() <{global = @"__auto.blk.24.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18181 = "torch_c.from_builtin_tensor"(%18180) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18182 = "util.global.load"() <{global = @"__auto.blk.24.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18183 = "torch_c.from_builtin_tensor"(%18182) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18184 = "util.global.load"() <{global = @"__auto.blk.24.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18185 = "torch_c.from_builtin_tensor"(%18184) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18186 = "util.global.load"() <{global = @"__auto.blk.24.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18187 = "torch_c.from_builtin_tensor"(%18186) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18188 = "util.global.load"() <{global = @__auto.blk.25.attn_norm.weight}> : () -> tensor<4096xbf16>
%18189 = "torch_c.from_builtin_tensor"(%18188) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18190 = "util.global.load"() <{global = @"__auto.blk.25.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18191 = "torch_c.from_builtin_tensor"(%18190) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18192 = "util.global.load"() <{global = @"__auto.blk.25.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18193 = "torch_c.from_builtin_tensor"(%18192) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18194 = "util.global.load"() <{global = @"__auto.blk.25.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18195 = "torch_c.from_builtin_tensor"(%18194) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18196 = "util.global.load"() <{global = @"__auto.blk.25.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18197 = "torch_c.from_builtin_tensor"(%18196) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18198 = "util.global.load"() <{global = @"__auto.blk.25.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18199 = "torch_c.from_builtin_tensor"(%18198) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18200 = "util.global.load"() <{global = @"__auto.blk.25.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18201 = "torch_c.from_builtin_tensor"(%18200) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18202 = "util.global.load"() <{global = @"__auto.blk.25.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18203 = "torch_c.from_builtin_tensor"(%18202) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18204 = "util.global.load"() <{global = @"__auto.blk.25.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18205 = "torch_c.from_builtin_tensor"(%18204) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18206 = "util.global.load"() <{global = @"__auto.blk.25.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18207 = "torch_c.from_builtin_tensor"(%18206) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18208 = "util.global.load"() <{global = @__auto.blk.25.attn_scale}> : () -> tensor<f32>
%18209 = "torch_c.from_builtin_tensor"(%18208) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18210 = "util.global.load"() <{global = @"__auto.blk.25.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18211 = "torch_c.from_builtin_tensor"(%18210) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18212 = "util.global.load"() <{global = @"__auto.blk.25.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18213 = "torch_c.from_builtin_tensor"(%18212) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18214 = "util.global.load"() <{global = @__auto.blk.25.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18215 = "torch_c.from_builtin_tensor"(%18214) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18216 = "util.global.load"() <{global = @"__auto.blk.25.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18217 = "torch_c.from_builtin_tensor"(%18216) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18218 = "util.global.load"() <{global = @"__auto.blk.25.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18219 = "torch_c.from_builtin_tensor"(%18218) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18220 = "util.global.load"() <{global = @"__auto.blk.25.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18221 = "torch_c.from_builtin_tensor"(%18220) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18222 = "util.global.load"() <{global = @"__auto.blk.25.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18223 = "torch_c.from_builtin_tensor"(%18222) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18224 = "util.global.load"() <{global = @"__auto.blk.25.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18225 = "torch_c.from_builtin_tensor"(%18224) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18226 = "util.global.load"() <{global = @"__auto.blk.25.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18227 = "torch_c.from_builtin_tensor"(%18226) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18228 = "util.global.load"() <{global = @__auto.blk.26.attn_norm.weight}> : () -> tensor<4096xbf16>
%18229 = "torch_c.from_builtin_tensor"(%18228) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18230 = "util.global.load"() <{global = @"__auto.blk.26.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18231 = "torch_c.from_builtin_tensor"(%18230) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18232 = "util.global.load"() <{global = @"__auto.blk.26.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18233 = "torch_c.from_builtin_tensor"(%18232) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18234 = "util.global.load"() <{global = @"__auto.blk.26.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18235 = "torch_c.from_builtin_tensor"(%18234) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18236 = "util.global.load"() <{global = @"__auto.blk.26.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18237 = "torch_c.from_builtin_tensor"(%18236) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18238 = "util.global.load"() <{global = @"__auto.blk.26.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18239 = "torch_c.from_builtin_tensor"(%18238) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18240 = "util.global.load"() <{global = @"__auto.blk.26.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18241 = "torch_c.from_builtin_tensor"(%18240) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18242 = "util.global.load"() <{global = @"__auto.blk.26.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18243 = "torch_c.from_builtin_tensor"(%18242) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18244 = "util.global.load"() <{global = @"__auto.blk.26.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18245 = "torch_c.from_builtin_tensor"(%18244) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18246 = "util.global.load"() <{global = @"__auto.blk.26.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18247 = "torch_c.from_builtin_tensor"(%18246) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18248 = "util.global.load"() <{global = @__auto.blk.26.attn_scale}> : () -> tensor<f32>
%18249 = "torch_c.from_builtin_tensor"(%18248) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18250 = "util.global.load"() <{global = @"__auto.blk.26.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18251 = "torch_c.from_builtin_tensor"(%18250) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18252 = "util.global.load"() <{global = @"__auto.blk.26.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18253 = "torch_c.from_builtin_tensor"(%18252) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18254 = "util.global.load"() <{global = @__auto.blk.26.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18255 = "torch_c.from_builtin_tensor"(%18254) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18256 = "util.global.load"() <{global = @"__auto.blk.26.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18257 = "torch_c.from_builtin_tensor"(%18256) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18258 = "util.global.load"() <{global = @"__auto.blk.26.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18259 = "torch_c.from_builtin_tensor"(%18258) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18260 = "util.global.load"() <{global = @"__auto.blk.26.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18261 = "torch_c.from_builtin_tensor"(%18260) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18262 = "util.global.load"() <{global = @"__auto.blk.26.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18263 = "torch_c.from_builtin_tensor"(%18262) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18264 = "util.global.load"() <{global = @"__auto.blk.26.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18265 = "torch_c.from_builtin_tensor"(%18264) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18266 = "util.global.load"() <{global = @"__auto.blk.26.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18267 = "torch_c.from_builtin_tensor"(%18266) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18268 = "util.global.load"() <{global = @__auto.blk.27.attn_norm.weight}> : () -> tensor<4096xbf16>
%18269 = "torch_c.from_builtin_tensor"(%18268) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18270 = "util.global.load"() <{global = @"__auto.blk.27.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18271 = "torch_c.from_builtin_tensor"(%18270) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18272 = "util.global.load"() <{global = @"__auto.blk.27.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18273 = "torch_c.from_builtin_tensor"(%18272) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18274 = "util.global.load"() <{global = @"__auto.blk.27.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18275 = "torch_c.from_builtin_tensor"(%18274) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18276 = "util.global.load"() <{global = @"__auto.blk.27.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18277 = "torch_c.from_builtin_tensor"(%18276) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18278 = "util.global.load"() <{global = @"__auto.blk.27.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18279 = "torch_c.from_builtin_tensor"(%18278) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18280 = "util.global.load"() <{global = @"__auto.blk.27.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18281 = "torch_c.from_builtin_tensor"(%18280) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18282 = "util.global.load"() <{global = @"__auto.blk.27.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18283 = "torch_c.from_builtin_tensor"(%18282) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18284 = "util.global.load"() <{global = @"__auto.blk.27.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18285 = "torch_c.from_builtin_tensor"(%18284) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18286 = "util.global.load"() <{global = @"__auto.blk.27.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18287 = "torch_c.from_builtin_tensor"(%18286) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18288 = "util.global.load"() <{global = @__auto.blk.27.attn_scale}> : () -> tensor<f32>
%18289 = "torch_c.from_builtin_tensor"(%18288) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18290 = "util.global.load"() <{global = @"__auto.blk.27.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18291 = "torch_c.from_builtin_tensor"(%18290) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18292 = "util.global.load"() <{global = @"__auto.blk.27.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18293 = "torch_c.from_builtin_tensor"(%18292) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18294 = "util.global.load"() <{global = @__auto.blk.27.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18295 = "torch_c.from_builtin_tensor"(%18294) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18296 = "util.global.load"() <{global = @"__auto.blk.27.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18297 = "torch_c.from_builtin_tensor"(%18296) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18298 = "util.global.load"() <{global = @"__auto.blk.27.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18299 = "torch_c.from_builtin_tensor"(%18298) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18300 = "util.global.load"() <{global = @"__auto.blk.27.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18301 = "torch_c.from_builtin_tensor"(%18300) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18302 = "util.global.load"() <{global = @"__auto.blk.27.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18303 = "torch_c.from_builtin_tensor"(%18302) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18304 = "util.global.load"() <{global = @"__auto.blk.27.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18305 = "torch_c.from_builtin_tensor"(%18304) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18306 = "util.global.load"() <{global = @"__auto.blk.27.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18307 = "torch_c.from_builtin_tensor"(%18306) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18308 = "util.global.load"() <{global = @__auto.blk.28.attn_norm.weight}> : () -> tensor<4096xbf16>
%18309 = "torch_c.from_builtin_tensor"(%18308) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18310 = "util.global.load"() <{global = @"__auto.blk.28.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18311 = "torch_c.from_builtin_tensor"(%18310) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18312 = "util.global.load"() <{global = @"__auto.blk.28.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18313 = "torch_c.from_builtin_tensor"(%18312) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18314 = "util.global.load"() <{global = @"__auto.blk.28.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18315 = "torch_c.from_builtin_tensor"(%18314) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18316 = "util.global.load"() <{global = @"__auto.blk.28.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18317 = "torch_c.from_builtin_tensor"(%18316) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18318 = "util.global.load"() <{global = @"__auto.blk.28.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18319 = "torch_c.from_builtin_tensor"(%18318) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18320 = "util.global.load"() <{global = @"__auto.blk.28.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18321 = "torch_c.from_builtin_tensor"(%18320) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18322 = "util.global.load"() <{global = @"__auto.blk.28.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18323 = "torch_c.from_builtin_tensor"(%18322) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18324 = "util.global.load"() <{global = @"__auto.blk.28.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18325 = "torch_c.from_builtin_tensor"(%18324) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18326 = "util.global.load"() <{global = @"__auto.blk.28.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18327 = "torch_c.from_builtin_tensor"(%18326) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18328 = "util.global.load"() <{global = @__auto.blk.28.attn_scale}> : () -> tensor<f32>
%18329 = "torch_c.from_builtin_tensor"(%18328) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18330 = "util.global.load"() <{global = @"__auto.blk.28.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18331 = "torch_c.from_builtin_tensor"(%18330) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18332 = "util.global.load"() <{global = @"__auto.blk.28.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18333 = "torch_c.from_builtin_tensor"(%18332) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18334 = "util.global.load"() <{global = @__auto.blk.28.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18335 = "torch_c.from_builtin_tensor"(%18334) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18336 = "util.global.load"() <{global = @"__auto.blk.28.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18337 = "torch_c.from_builtin_tensor"(%18336) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18338 = "util.global.load"() <{global = @"__auto.blk.28.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18339 = "torch_c.from_builtin_tensor"(%18338) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18340 = "util.global.load"() <{global = @"__auto.blk.28.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18341 = "torch_c.from_builtin_tensor"(%18340) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18342 = "util.global.load"() <{global = @"__auto.blk.28.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18343 = "torch_c.from_builtin_tensor"(%18342) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18344 = "util.global.load"() <{global = @"__auto.blk.28.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18345 = "torch_c.from_builtin_tensor"(%18344) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18346 = "util.global.load"() <{global = @"__auto.blk.28.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18347 = "torch_c.from_builtin_tensor"(%18346) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18348 = "util.global.load"() <{global = @__auto.blk.29.attn_norm.weight}> : () -> tensor<4096xbf16>
%18349 = "torch_c.from_builtin_tensor"(%18348) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18350 = "util.global.load"() <{global = @"__auto.blk.29.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18351 = "torch_c.from_builtin_tensor"(%18350) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18352 = "util.global.load"() <{global = @"__auto.blk.29.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18353 = "torch_c.from_builtin_tensor"(%18352) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18354 = "util.global.load"() <{global = @"__auto.blk.29.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18355 = "torch_c.from_builtin_tensor"(%18354) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18356 = "util.global.load"() <{global = @"__auto.blk.29.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18357 = "torch_c.from_builtin_tensor"(%18356) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18358 = "util.global.load"() <{global = @"__auto.blk.29.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18359 = "torch_c.from_builtin_tensor"(%18358) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18360 = "util.global.load"() <{global = @"__auto.blk.29.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18361 = "torch_c.from_builtin_tensor"(%18360) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18362 = "util.global.load"() <{global = @"__auto.blk.29.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18363 = "torch_c.from_builtin_tensor"(%18362) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18364 = "util.global.load"() <{global = @"__auto.blk.29.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18365 = "torch_c.from_builtin_tensor"(%18364) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18366 = "util.global.load"() <{global = @"__auto.blk.29.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18367 = "torch_c.from_builtin_tensor"(%18366) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18368 = "util.global.load"() <{global = @__auto.blk.29.attn_scale}> : () -> tensor<f32>
%18369 = "torch_c.from_builtin_tensor"(%18368) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18370 = "util.global.load"() <{global = @"__auto.blk.29.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18371 = "torch_c.from_builtin_tensor"(%18370) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18372 = "util.global.load"() <{global = @"__auto.blk.29.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18373 = "torch_c.from_builtin_tensor"(%18372) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18374 = "util.global.load"() <{global = @__auto.blk.29.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18375 = "torch_c.from_builtin_tensor"(%18374) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18376 = "util.global.load"() <{global = @"__auto.blk.29.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18377 = "torch_c.from_builtin_tensor"(%18376) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18378 = "util.global.load"() <{global = @"__auto.blk.29.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18379 = "torch_c.from_builtin_tensor"(%18378) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18380 = "util.global.load"() <{global = @"__auto.blk.29.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18381 = "torch_c.from_builtin_tensor"(%18380) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18382 = "util.global.load"() <{global = @"__auto.blk.29.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18383 = "torch_c.from_builtin_tensor"(%18382) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18384 = "util.global.load"() <{global = @"__auto.blk.29.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18385 = "torch_c.from_builtin_tensor"(%18384) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18386 = "util.global.load"() <{global = @"__auto.blk.29.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18387 = "torch_c.from_builtin_tensor"(%18386) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18388 = "util.global.load"() <{global = @__auto.blk.30.attn_norm.weight}> : () -> tensor<4096xbf16>
%18389 = "torch_c.from_builtin_tensor"(%18388) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18390 = "util.global.load"() <{global = @"__auto.blk.30.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18391 = "torch_c.from_builtin_tensor"(%18390) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18392 = "util.global.load"() <{global = @"__auto.blk.30.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18393 = "torch_c.from_builtin_tensor"(%18392) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18394 = "util.global.load"() <{global = @"__auto.blk.30.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18395 = "torch_c.from_builtin_tensor"(%18394) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18396 = "util.global.load"() <{global = @"__auto.blk.30.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18397 = "torch_c.from_builtin_tensor"(%18396) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18398 = "util.global.load"() <{global = @"__auto.blk.30.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18399 = "torch_c.from_builtin_tensor"(%18398) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18400 = "util.global.load"() <{global = @"__auto.blk.30.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18401 = "torch_c.from_builtin_tensor"(%18400) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18402 = "util.global.load"() <{global = @"__auto.blk.30.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18403 = "torch_c.from_builtin_tensor"(%18402) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18404 = "util.global.load"() <{global = @"__auto.blk.30.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18405 = "torch_c.from_builtin_tensor"(%18404) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18406 = "util.global.load"() <{global = @"__auto.blk.30.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18407 = "torch_c.from_builtin_tensor"(%18406) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18408 = "util.global.load"() <{global = @__auto.blk.30.attn_scale}> : () -> tensor<f32>
%18409 = "torch_c.from_builtin_tensor"(%18408) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18410 = "util.global.load"() <{global = @"__auto.blk.30.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18411 = "torch_c.from_builtin_tensor"(%18410) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18412 = "util.global.load"() <{global = @"__auto.blk.30.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18413 = "torch_c.from_builtin_tensor"(%18412) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18414 = "util.global.load"() <{global = @__auto.blk.30.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18415 = "torch_c.from_builtin_tensor"(%18414) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18416 = "util.global.load"() <{global = @"__auto.blk.30.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18417 = "torch_c.from_builtin_tensor"(%18416) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18418 = "util.global.load"() <{global = @"__auto.blk.30.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18419 = "torch_c.from_builtin_tensor"(%18418) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18420 = "util.global.load"() <{global = @"__auto.blk.30.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18421 = "torch_c.from_builtin_tensor"(%18420) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18422 = "util.global.load"() <{global = @"__auto.blk.30.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18423 = "torch_c.from_builtin_tensor"(%18422) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18424 = "util.global.load"() <{global = @"__auto.blk.30.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18425 = "torch_c.from_builtin_tensor"(%18424) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18426 = "util.global.load"() <{global = @"__auto.blk.30.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18427 = "torch_c.from_builtin_tensor"(%18426) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18428 = "util.global.load"() <{global = @__auto.blk.31.attn_norm.weight}> : () -> tensor<4096xbf16>
%18429 = "torch_c.from_builtin_tensor"(%18428) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18430 = "util.global.load"() <{global = @"__auto.blk.31.attn_q.q_input:rscale"}> : () -> tensor<f32>
%18431 = "torch_c.from_builtin_tensor"(%18430) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18432 = "util.global.load"() <{global = @"__auto.blk.31.attn_q.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18433 = "torch_c.from_builtin_tensor"(%18432) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18434 = "util.global.load"() <{global = @"__auto.blk.31.attn_q.q_output:rscale"}> : () -> tensor<f32>
%18435 = "torch_c.from_builtin_tensor"(%18434) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18436 = "util.global.load"() <{global = @"__auto.blk.31.attn_k.q_input:rscale"}> : () -> tensor<f32>
%18437 = "torch_c.from_builtin_tensor"(%18436) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18438 = "util.global.load"() <{global = @"__auto.blk.31.attn_k.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18439 = "torch_c.from_builtin_tensor"(%18438) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18440 = "util.global.load"() <{global = @"__auto.blk.31.attn_k.q_output:rscale"}> : () -> tensor<f32>
%18441 = "torch_c.from_builtin_tensor"(%18440) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18442 = "util.global.load"() <{global = @"__auto.blk.31.attn_v.q_input:rscale"}> : () -> tensor<f32>
%18443 = "torch_c.from_builtin_tensor"(%18442) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18444 = "util.global.load"() <{global = @"__auto.blk.31.attn_v.weight:qs"}> : () -> tensor<1024x4096xf8E4M3FNUZ>
%18445 = "torch_c.from_builtin_tensor"(%18444) : (tensor<1024x4096xf8E4M3FNUZ>) -> !torch.vtensor<[1024,4096],f8E4M3FNUZ>
%18446 = "util.global.load"() <{global = @"__auto.blk.31.attn_v.q_output:rscale"}> : () -> tensor<f32>
%18447 = "torch_c.from_builtin_tensor"(%18446) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18448 = "util.global.load"() <{global = @__auto.blk.31.attn_scale}> : () -> tensor<f32>
%18449 = "torch_c.from_builtin_tensor"(%18448) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18450 = "util.global.load"() <{global = @"__auto.blk.31.attn_output.q_input:rscale"}> : () -> tensor<f32>
%18451 = "torch_c.from_builtin_tensor"(%18450) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18452 = "util.global.load"() <{global = @"__auto.blk.31.attn_output.weight:qs"}> : () -> tensor<4096x4096xf8E4M3FNUZ>
%18453 = "torch_c.from_builtin_tensor"(%18452) : (tensor<4096x4096xf8E4M3FNUZ>) -> !torch.vtensor<[4096,4096],f8E4M3FNUZ>
%18454 = "util.global.load"() <{global = @__auto.blk.31.ffn_norm.weight}> : () -> tensor<4096xbf16>
%18455 = "torch_c.from_builtin_tensor"(%18454) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18456 = "util.global.load"() <{global = @"__auto.blk.31.ffn_gate.q_input:rscale"}> : () -> tensor<f32>
%18457 = "torch_c.from_builtin_tensor"(%18456) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18458 = "util.global.load"() <{global = @"__auto.blk.31.ffn_gate.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18459 = "torch_c.from_builtin_tensor"(%18458) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18460 = "util.global.load"() <{global = @"__auto.blk.31.ffn_up.q_input:rscale"}> : () -> tensor<f32>
%18461 = "torch_c.from_builtin_tensor"(%18460) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18462 = "util.global.load"() <{global = @"__auto.blk.31.ffn_up.weight:qs"}> : () -> tensor<14336x4096xf8E4M3FNUZ>
%18463 = "torch_c.from_builtin_tensor"(%18462) : (tensor<14336x4096xf8E4M3FNUZ>) -> !torch.vtensor<[14336,4096],f8E4M3FNUZ>
%18464 = "util.global.load"() <{global = @"__auto.blk.31.ffn_down.q_input:rscale"}> : () -> tensor<f32>
%18465 = "torch_c.from_builtin_tensor"(%18464) : (tensor<f32>) -> !torch.vtensor<[],f32>
%18466 = "util.global.load"() <{global = @"__auto.blk.31.ffn_down.weight:qs"}> : () -> tensor<4096x14336xf8E4M3FNUZ>
%18467 = "torch_c.from_builtin_tensor"(%18466) : (tensor<4096x14336xf8E4M3FNUZ>) -> !torch.vtensor<[4096,14336],f8E4M3FNUZ>
%18468 = "util.global.load"() <{global = @__auto.output_norm.weight}> : () -> tensor<4096xbf16>
%18469 = "torch_c.from_builtin_tensor"(%18468) : (tensor<4096xbf16>) -> !torch.vtensor<[4096],bf16>
%18470 = "util.global.load"() <{global = @__auto.output.weight}> : () -> tensor<128256x4096xbf16>
%18471 = "torch_c.from_builtin_tensor"(%18470) : (tensor<128256x4096xbf16>) -> !torch.vtensor<[128256,4096],bf16>
%18472 = "torch.copy.to_vtensor"(%arg70) : (!torch.tensor<[?,2097152],f8E4M3FNUZ>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
%18473 = "torch.symbolic_int"() <{max_val = 131040 : i64, min_val = 64 : i64, symbol_name = "32*s1"}> : () -> !torch.int
%18474 = "torch.symbolic_int"() <{max_val = 4095 : i64, min_val = 2 : i64, symbol_name = "s1"}> : () -> !torch.int
%18475 = "torch.symbolic_int"() <{max_val = 9223372036854775807 : i64, min_val = 0 : i64, symbol_name = "s2"}> : () -> !torch.int
"torch.bind_symbolic_shape"(%arg67, %18474) <{shape_expressions = #map}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
"torch.bind_symbolic_shape"(%arg69, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
"torch.bind_symbolic_shape"(%18472, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%18476 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18477 = "torch.aten.size.int"(%arg69, %18476) : (!torch.vtensor<[4,?],si64>, !torch.int) -> !torch.int
%18478 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18479 = "torch.aten.size.int"(%18472, %18478) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> !torch.int
%18480 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18481 = "torch.aten.size.int"(%arg67, %18480) : (!torch.vtensor<[4,?],si64>, !torch.int) -> !torch.int
%18482 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18483 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18484 = "torch.constant.none"() : () -> !torch.none
%18485 = "torch.constant.none"() : () -> !torch.none
%18486 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%18487 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18488 = "torch.aten.arange.start_step"(%18482, %18481, %18483, %18484, %18485, %18486, %18487) : (!torch.int, !torch.int, !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%18488, %18474) <{shape_expressions = #map3}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%18489 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%18490 = "torch.aten.unsqueeze"(%arg68, %18489) : (!torch.vtensor<[4],si64>, !torch.int) -> !torch.vtensor<[4,1],si64>
%18491 = "torch.aten.ge.Tensor"(%18488, %18490) : (!torch.vtensor<[?],si64>, !torch.vtensor<[4,1],si64>) -> !torch.vtensor<[4,?],i1>
"torch.bind_symbolic_shape"(%18491, %18474) <{shape_expressions = #map}> : (!torch.vtensor<[4,?],i1>, !torch.int) -> ()
%18492 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18493 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18494 = "torch.prim.ListConstruct"(%18492, %18493) : (!torch.int, !torch.int) -> !torch.list<int>
%18495 = "torch.constant.int"() <{value = 11 : i64}> : () -> !torch.int
%18496 = "torch.constant.none"() : () -> !torch.none
%18497 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%18498 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18499 = "torch.aten.ones"(%18494, %18495, %18496, %18497, %18498) : (!torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[1,1],i1>
%18500 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%18501 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%18502 = "torch.prim.ListConstruct"(%18500, %18501) : (!torch.int, !torch.int) -> !torch.list<int>
%18503 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18504 = "torch.aten.expand"(%18499, %18502, %18503) : (!torch.vtensor<[1,1],i1>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[131072,131072],i1>
%18505 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18506 = "torch.aten.triu"(%18504, %18505) : (!torch.vtensor<[131072,131072],i1>, !torch.int) -> !torch.vtensor<[131072,131072],i1>
%18507 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18508 = "torch.aten.unsqueeze"(%18506, %18507) : (!torch.vtensor<[131072,131072],i1>, !torch.int) -> !torch.vtensor<[1,131072,131072],i1>
%18509 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18510 = "torch.aten.unsqueeze"(%18508, %18509) : (!torch.vtensor<[1,131072,131072],i1>, !torch.int) -> !torch.vtensor<[1,1,131072,131072],i1>
%18511 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18512 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18513 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18514 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18515 = "torch.aten.slice.Tensor"(%18510, %18511, %18512, %18513, %18514) : (!torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,1,131072,131072],i1>
%18516 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18517 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18518 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18519 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18520 = "torch.aten.slice.Tensor"(%18515, %18516, %18517, %18518, %18519) : (!torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,1,131072,131072],i1>
%18521 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18522 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18523 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18524 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18525 = "torch.aten.slice.Tensor"(%18520, %18521, %18522, %18523, %18524) : (!torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,1,131072,131072],i1>
%18526 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18527 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18528 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18529 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18530 = "torch.aten.slice.Tensor"(%18525, %18526, %18527, %18528, %18529) : (!torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,1,131072,131072],i1>
%18531 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18532 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18533 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18534 = "torch.aten.slice.Tensor"(%18530, %18531, %18532, %18481, %18533) : (!torch.vtensor<[1,1,131072,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,1,?,131072],i1>
"torch.bind_symbolic_shape"(%18534, %18474) <{shape_expressions = #map4}> : (!torch.vtensor<[1,1,?,131072],i1>, !torch.int) -> ()
%18535 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18536 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18537 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18538 = "torch.aten.slice.Tensor"(%18534, %18535, %18536, %18481, %18537) : (!torch.vtensor<[1,1,?,131072],i1>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,1,?,?],i1>
"torch.bind_symbolic_shape"(%18538, %18474) <{shape_expressions = #map5}> : (!torch.vtensor<[1,1,?,?],i1>, !torch.int) -> ()
%18539 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18540 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18541 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18542 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18543 = "torch.aten.slice.Tensor"(%18491, %18539, %18540, %18541, %18542) : (!torch.vtensor<[4,?],i1>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?],i1>
"torch.bind_symbolic_shape"(%18543, %18474) <{shape_expressions = #map}> : (!torch.vtensor<[4,?],i1>, !torch.int) -> ()
%18544 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18545 = "torch.aten.unsqueeze"(%18543, %18544) : (!torch.vtensor<[4,?],i1>, !torch.int) -> !torch.vtensor<[4,1,?],i1>
"torch.bind_symbolic_shape"(%18545, %18474) <{shape_expressions = #map6}> : (!torch.vtensor<[4,1,?],i1>, !torch.int) -> ()
%18546 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18547 = "torch.aten.unsqueeze"(%18545, %18546) : (!torch.vtensor<[4,1,?],i1>, !torch.int) -> !torch.vtensor<[4,1,1,?],i1>
"torch.bind_symbolic_shape"(%18547, %18474) <{shape_expressions = #map7}> : (!torch.vtensor<[4,1,1,?],i1>, !torch.int) -> ()
%18548 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18549 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18550 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18551 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18552 = "torch.aten.slice.Tensor"(%18547, %18548, %18549, %18550, %18551) : (!torch.vtensor<[4,1,1,?],i1>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,1,1,?],i1>
"torch.bind_symbolic_shape"(%18552, %18474) <{shape_expressions = #map7}> : (!torch.vtensor<[4,1,1,?],i1>, !torch.int) -> ()
%18553 = "torch.aten.logical_or"(%18538, %18552) : (!torch.vtensor<[1,1,?,?],i1>, !torch.vtensor<[4,1,1,?],i1>) -> !torch.vtensor<[4,1,?,?],i1>
"torch.bind_symbolic_shape"(%18553, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],i1>, !torch.int) -> ()
%18554 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18555 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%18556 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18557 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%18558 = "torch.constant.none"() : () -> !torch.none
%18559 = "torch.aten.scalar_tensor"(%18554, %18555, %18556, %18557, %18558) : (!torch.int, !torch.int, !torch.int, !torch.Device, !torch.none) -> !torch.vtensor<[],f32>
%18560 = "torch.constant.float"() <{value = 0xFFF0000000000000 : f64}> : () -> !torch.float
%18561 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%18562 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18563 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%18564 = "torch.constant.none"() : () -> !torch.none
%18565 = "torch.aten.scalar_tensor"(%18560, %18561, %18562, %18563, %18564) : (!torch.float, !torch.int, !torch.int, !torch.Device, !torch.none) -> !torch.vtensor<[],f32>
%18566 = "torch.aten.where.self"(%18553, %18565, %18559) : (!torch.vtensor<[4,1,?,?],i1>, !torch.vtensor<[],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,1,?,?],f32>
"torch.bind_symbolic_shape"(%18566, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f32>, !torch.int) -> ()
%18567 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%18568 = "torch.prims.convert_element_type"(%18566, %18567) : (!torch.vtensor<[4,1,?,?],f32>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18568, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%18569 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%18570 = "torch.prims.convert_element_type"(%18568, %18569) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18570, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%18571 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%18572 = "torch.prims.convert_element_type"(%17187, %18571) : (!torch.vtensor<[128256,4096],bf16>, !torch.int) -> !torch.vtensor<[128256,4096],bf16>
%18573 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%18574 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18575 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18576 = "torch.aten.embedding"(%18572, %arg67, %18573, %18574, %18575) : (!torch.vtensor<[128256,4096],bf16>, !torch.vtensor<[4,?],si64>, !torch.int, !torch.bool, !torch.bool) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18576, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18577 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%18578 = "torch.prims.convert_element_type"(%18576, %18577) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%18578, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%18579 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18580 = "torch.aten.pow.Tensor_Scalar"(%18578, %18579) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%18580, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%18581 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%18582 = "torch.prim.ListConstruct"(%18581) : (!torch.int) -> !torch.list<int>
%18583 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%18584 = "torch.constant.none"() : () -> !torch.none
%18585 = "torch.aten.mean.dim"(%18580, %18582, %18583, %18584) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%18585, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%18586 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%18587 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18588 = "torch.aten.add.Scalar"(%18585, %18586, %18587) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%18588, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%18589 = "torch.aten.rsqrt"(%18588) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%18589, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%18590 = "torch.aten.mul.Tensor"(%18578, %18589) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%18590, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%18591 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%18592 = "torch.prims.convert_element_type"(%18590, %18591) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18592, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18593 = "torch.aten.mul.Tensor"(%17189, %18592) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18593, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18594 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%18595 = "torch.prims.convert_element_type"(%18593, %18594) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18595, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18596 = "torch.aten.div.Tensor"(%18595, %17191) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18596, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18597 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%18598 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%18599 = "torch.aten.clamp"(%18596, %18597, %18598) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18599, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18600 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%18601 = "torch.prims.convert_element_type"(%18599, %18600) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18601, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%18602 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18603 = "torch.aten.unsqueeze"(%17193, %18602) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%18604 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18605 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%18606 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%18607 = "torch.prim.ListConstruct"(%18604, %18605, %18606) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18608 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18609 = "torch.aten.expand"(%18603, %18607, %18608) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%18610 = "torch_c.to_builtin_tensor"(%18601) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%18611 = "torch_c.to_builtin_tensor"(%18609) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%18612 = "util.call"(%18610, %18611) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%18613 = "torch_c.from_builtin_tensor"(%18612) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%18613, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%18614 = "torch.aten.div.Tensor"(%18613, %17195) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%18614, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%18615 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%18616 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%18617 = "torch.aten.clamp"(%18614, %18615, %18616) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%18617, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%18618 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%18619 = "torch.prims.convert_element_type"(%18617, %18618) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18619, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%18620 = "torch.aten.div.Tensor"(%18595, %17197) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18620, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18621 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%18622 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%18623 = "torch.aten.clamp"(%18620, %18621, %18622) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18623, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18624 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%18625 = "torch.prims.convert_element_type"(%18623, %18624) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18625, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%18626 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18627 = "torch.aten.unsqueeze"(%17199, %18626) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%18628 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18629 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%18630 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%18631 = "torch.prim.ListConstruct"(%18628, %18629, %18630) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18632 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18633 = "torch.aten.expand"(%18627, %18631, %18632) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%18634 = "torch_c.to_builtin_tensor"(%18625) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%18635 = "torch_c.to_builtin_tensor"(%18633) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%18636 = "util.call"(%18634, %18635) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%18637 = "torch_c.from_builtin_tensor"(%18636) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%18637, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%18638 = "torch.aten.div.Tensor"(%18637, %17201) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%18638, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%18639 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%18640 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%18641 = "torch.aten.clamp"(%18638, %18639, %18640) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%18641, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%18642 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%18643 = "torch.prims.convert_element_type"(%18641, %18642) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18643, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%18644 = "torch.aten.div.Tensor"(%18595, %17203) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18644, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18645 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%18646 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%18647 = "torch.aten.clamp"(%18644, %18645, %18646) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%18647, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%18648 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%18649 = "torch.prims.convert_element_type"(%18647, %18648) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18649, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%18650 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18651 = "torch.aten.unsqueeze"(%17205, %18650) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%18652 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18653 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%18654 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%18655 = "torch.prim.ListConstruct"(%18652, %18653, %18654) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18656 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18657 = "torch.aten.expand"(%18651, %18655, %18656) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%18658 = "torch_c.to_builtin_tensor"(%18649) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%18659 = "torch_c.to_builtin_tensor"(%18657) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%18660 = "util.call"(%18658, %18659) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%18661 = "torch_c.from_builtin_tensor"(%18660) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%18661, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%18662 = "torch.aten.div.Tensor"(%18661, %17207) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%18662, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%18663 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%18664 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%18665 = "torch.aten.clamp"(%18662, %18663, %18664) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%18665, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%18666 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%18667 = "torch.prims.convert_element_type"(%18665, %18666) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18667, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%18668 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18669 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%18670 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%18671 = "torch.prim.ListConstruct"(%18668, %18481, %18669, %18670) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18672 = "torch.aten.view"(%18619, %18671) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18672, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%18673 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18674 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%18675 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%18676 = "torch.prim.ListConstruct"(%18673, %18481, %18674, %18675) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18677 = "torch.aten.view"(%18643, %18676) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18677, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%18678 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18679 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%18680 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%18681 = "torch.prim.ListConstruct"(%18678, %18481, %18679, %18680) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18682 = "torch.aten.view"(%18667, %18681) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18682, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%18683 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%18684 = "torch.constant.none"() : () -> !torch.none
%18685 = "torch.constant.none"() : () -> !torch.none
%18686 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%18687 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18688 = "torch.aten.arange"(%18683, %18684, %18685, %18686, %18687) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%18689 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18690 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%18691 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18692 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18693 = "torch.constant.none"() : () -> !torch.none
%18694 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%18695 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18696 = "torch.aten.arange.start_step"(%18689, %18690, %18691, %18692, %18693, %18694, %18695) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%18697 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%18698 = "torch.prims.convert_element_type"(%18696, %18697) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%18699 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%18700 = "torch.aten.div.Scalar"(%18698, %18699) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18701 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%18702 = "torch.aten.pow.Scalar"(%18701, %18700) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18703 = "torch.aten.reciprocal"(%18702) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18704 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%18705 = "torch.aten.mul.Scalar"(%18703, %18704) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%18706 = "torch.aten.reciprocal"(%18705) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18707 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%18708 = "torch.aten.mul.Scalar"(%18706, %18707) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%18709 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%18710 = "torch.aten.gt.Scalar"(%18708, %18709) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%18711 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%18712 = "torch.aten.div.Scalar"(%18705, %18711) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18713 = "torch.aten.where.self"(%18710, %18712, %18705) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18714 = "torch.aten.reciprocal"(%18708) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18715 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%18716 = "torch.aten.mul.Scalar"(%18714, %18715) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18717 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18718 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18719 = "torch.aten.sub.Scalar"(%18716, %18717, %18718) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%18720 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18721 = "torch.aten.div.Scalar"(%18719, %18720) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18722 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18723 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18724 = "torch.aten.rsub.Scalar"(%18721, %18722, %18723) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%18725 = "torch.aten.mul.Tensor"(%18724, %18713) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18726 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%18727 = "torch.aten.div.Scalar"(%18725, %18726) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18728 = "torch.aten.mul.Tensor"(%18721, %18713) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18729 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18730 = "torch.aten.add.Tensor"(%18727, %18728, %18729) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18731 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%18732 = "torch.aten.lt.Scalar"(%18708, %18731) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%18733 = "torch.aten.bitwise_not"(%18732) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%18734 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%18735 = "torch.aten.gt.Scalar"(%18708, %18734) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%18736 = "torch.aten.bitwise_not"(%18735) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%18737 = "torch.aten.mul.Tensor"(%18733, %18736) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%18738 = "torch.aten.where.self"(%18737, %18730, %18713) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18739 = "torch.prim.ListConstruct"(%18738, %18738) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%18740 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%18741 = "torch.aten.cat"(%18739, %18740) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%18742 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%18743 = "torch.prims.convert_element_type"(%18688, %18742) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%18744 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%18745 = "torch.prims.convert_element_type"(%18741, %18744) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%18746 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%18747 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18748 = "torch.prim.ListConstruct"(%18746, %18747) : (!torch.int, !torch.int) -> !torch.list<int>
%18749 = "torch.aten.view"(%18743, %18748) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%18750 = "torch.aten.mul.Tensor"(%18749, %18745) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%18751 = "torch.aten.cos"(%18750) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%18752 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%18753 = "torch.prims.convert_element_type"(%18751, %18752) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%18754 = "torch.aten.sin"(%18750) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%18755 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%18756 = "torch.prims.convert_element_type"(%18754, %18755) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%18757 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18758 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18759 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18760 = "torch.aten.slice.Tensor"(%18753, %18757, %18758, %18481, %18759) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%18760, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%18761 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18762 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18763 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18764 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18765 = "torch.aten.slice.Tensor"(%18760, %18761, %18762, %18763, %18764) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%18765, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%18766 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18767 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18768 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18769 = "torch.aten.slice.Tensor"(%18756, %18766, %18767, %18481, %18768) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%18769, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%18770 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18771 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18772 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18773 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18774 = "torch.aten.slice.Tensor"(%18769, %18770, %18771, %18772, %18773) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%18774, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%18775 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18776 = "torch.aten.unsqueeze"(%18765, %18775) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%18776, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%18777 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18778 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18779 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18780 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18781 = "torch.aten.slice.Tensor"(%18776, %18777, %18778, %18779, %18780) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%18781, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%18782 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18783 = "torch.aten.unsqueeze"(%18781, %18782) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18783, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%18784 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18785 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18786 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18787 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18788 = "torch.aten.slice.Tensor"(%18783, %18784, %18785, %18786, %18787) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18788, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%18789 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18790 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18791 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18792 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18793 = "torch.prim.ListConstruct"(%18789, %18790, %18791, %18792) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18794 = "torch.aten.repeat"(%18788, %18793) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18794, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%18795 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18796 = "torch.aten.unsqueeze"(%18774, %18795) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%18796, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%18797 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18798 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18799 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18800 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18801 = "torch.aten.slice.Tensor"(%18796, %18797, %18798, %18799, %18800) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%18801, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%18802 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18803 = "torch.aten.unsqueeze"(%18801, %18802) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18803, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%18804 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18805 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18806 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18807 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18808 = "torch.aten.slice.Tensor"(%18803, %18804, %18805, %18806, %18807) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18808, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%18809 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18810 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18811 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18812 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18813 = "torch.prim.ListConstruct"(%18809, %18810, %18811, %18812) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18814 = "torch.aten.repeat"(%18808, %18813) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18814, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%18815 = "torch.aten.mul.Tensor"(%18672, %18794) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18815, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%18816 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18817 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18818 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%18819 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18820 = "torch.aten.slice.Tensor"(%18672, %18816, %18817, %18818, %18819) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18820, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%18821 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18822 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%18823 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18824 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18825 = "torch.aten.slice.Tensor"(%18672, %18821, %18822, %18823, %18824) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18825, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%18826 = "torch.aten.neg"(%18825) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18826, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%18827 = "torch.prim.ListConstruct"(%18826, %18820) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%18828 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%18829 = "torch.aten.cat"(%18827, %18828) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18829, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%18830 = "torch.aten.mul.Tensor"(%18829, %18814) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18830, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%18831 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18832 = "torch.aten.add.Tensor"(%18815, %18830, %18831) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18832, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%18833 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%18834 = "torch.constant.none"() : () -> !torch.none
%18835 = "torch.constant.none"() : () -> !torch.none
%18836 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%18837 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18838 = "torch.aten.arange"(%18833, %18834, %18835, %18836, %18837) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%18839 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18840 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%18841 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18842 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18843 = "torch.constant.none"() : () -> !torch.none
%18844 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%18845 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%18846 = "torch.aten.arange.start_step"(%18839, %18840, %18841, %18842, %18843, %18844, %18845) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%18847 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%18848 = "torch.prims.convert_element_type"(%18846, %18847) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%18849 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%18850 = "torch.aten.div.Scalar"(%18848, %18849) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18851 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%18852 = "torch.aten.pow.Scalar"(%18851, %18850) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18853 = "torch.aten.reciprocal"(%18852) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18854 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%18855 = "torch.aten.mul.Scalar"(%18853, %18854) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%18856 = "torch.aten.reciprocal"(%18855) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18857 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%18858 = "torch.aten.mul.Scalar"(%18856, %18857) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%18859 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%18860 = "torch.aten.gt.Scalar"(%18858, %18859) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%18861 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%18862 = "torch.aten.div.Scalar"(%18855, %18861) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18863 = "torch.aten.where.self"(%18860, %18862, %18855) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18864 = "torch.aten.reciprocal"(%18858) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18865 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%18866 = "torch.aten.mul.Scalar"(%18864, %18865) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18867 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18868 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18869 = "torch.aten.sub.Scalar"(%18866, %18867, %18868) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%18870 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18871 = "torch.aten.div.Scalar"(%18869, %18870) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18872 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18873 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18874 = "torch.aten.rsub.Scalar"(%18871, %18872, %18873) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%18875 = "torch.aten.mul.Tensor"(%18874, %18863) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18876 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%18877 = "torch.aten.div.Scalar"(%18875, %18876) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18878 = "torch.aten.mul.Tensor"(%18871, %18863) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18879 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18880 = "torch.aten.add.Tensor"(%18877, %18878, %18879) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%18881 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%18882 = "torch.aten.lt.Scalar"(%18858, %18881) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%18883 = "torch.aten.bitwise_not"(%18882) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%18884 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%18885 = "torch.aten.gt.Scalar"(%18858, %18884) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%18886 = "torch.aten.bitwise_not"(%18885) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%18887 = "torch.aten.mul.Tensor"(%18883, %18886) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%18888 = "torch.aten.where.self"(%18887, %18880, %18863) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%18889 = "torch.prim.ListConstruct"(%18888, %18888) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%18890 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%18891 = "torch.aten.cat"(%18889, %18890) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%18892 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%18893 = "torch.prims.convert_element_type"(%18838, %18892) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%18894 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%18895 = "torch.prims.convert_element_type"(%18891, %18894) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%18896 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%18897 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18898 = "torch.prim.ListConstruct"(%18896, %18897) : (!torch.int, !torch.int) -> !torch.list<int>
%18899 = "torch.aten.view"(%18893, %18898) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%18900 = "torch.aten.mul.Tensor"(%18899, %18895) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%18901 = "torch.aten.cos"(%18900) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%18902 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%18903 = "torch.prims.convert_element_type"(%18901, %18902) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%18904 = "torch.aten.sin"(%18900) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%18905 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%18906 = "torch.prims.convert_element_type"(%18904, %18905) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%18907 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18908 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18909 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18910 = "torch.aten.slice.Tensor"(%18903, %18907, %18908, %18481, %18909) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%18910, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%18911 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18912 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18913 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18914 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18915 = "torch.aten.slice.Tensor"(%18910, %18911, %18912, %18913, %18914) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%18915, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%18916 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18917 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18918 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18919 = "torch.aten.slice.Tensor"(%18906, %18916, %18917, %18481, %18918) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%18919, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%18920 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18921 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18922 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18923 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18924 = "torch.aten.slice.Tensor"(%18919, %18920, %18921, %18922, %18923) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%18924, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%18925 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18926 = "torch.aten.unsqueeze"(%18915, %18925) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%18926, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%18927 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18928 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18929 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18930 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18931 = "torch.aten.slice.Tensor"(%18926, %18927, %18928, %18929, %18930) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%18931, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%18932 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18933 = "torch.aten.unsqueeze"(%18931, %18932) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18933, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%18934 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18935 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18936 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18937 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18938 = "torch.aten.slice.Tensor"(%18933, %18934, %18935, %18936, %18937) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18938, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%18939 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18940 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18941 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18942 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18943 = "torch.prim.ListConstruct"(%18939, %18940, %18941, %18942) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18944 = "torch.aten.repeat"(%18938, %18943) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18944, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%18945 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18946 = "torch.aten.unsqueeze"(%18924, %18945) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%18946, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%18947 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18948 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18949 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18950 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18951 = "torch.aten.slice.Tensor"(%18946, %18947, %18948, %18949, %18950) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%18951, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%18952 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18953 = "torch.aten.unsqueeze"(%18951, %18952) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18953, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%18954 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18955 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18956 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18957 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18958 = "torch.aten.slice.Tensor"(%18953, %18954, %18955, %18956, %18957) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18958, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%18959 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%18960 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18961 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18962 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18963 = "torch.prim.ListConstruct"(%18959, %18960, %18961, %18962) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18964 = "torch.aten.repeat"(%18958, %18963) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%18964, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%18965 = "torch.aten.mul.Tensor"(%18677, %18944) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18965, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%18966 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18967 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%18968 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%18969 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18970 = "torch.aten.slice.Tensor"(%18677, %18966, %18967, %18968, %18969) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18970, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%18971 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%18972 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%18973 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%18974 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18975 = "torch.aten.slice.Tensor"(%18677, %18971, %18972, %18973, %18974) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18975, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%18976 = "torch.aten.neg"(%18975) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18976, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%18977 = "torch.prim.ListConstruct"(%18976, %18970) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%18978 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%18979 = "torch.aten.cat"(%18977, %18978) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18979, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%18980 = "torch.aten.mul.Tensor"(%18979, %18964) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18980, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%18981 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%18982 = "torch.aten.add.Tensor"(%18965, %18980, %18981) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18982, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%18983 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%18984 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18985 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%18986 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%18987 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%18988 = "torch.prim.ListConstruct"(%18479, %18983, %18984, %18985, %18986, %18987) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18989 = "torch.aten.view"(%18472, %18988) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18989, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%18990 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%18991 = "torch.aten.mul.int"(%18479, %18990) : (!torch.int, !torch.int) -> !torch.int
%18992 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%18993 = "torch.aten.mul.int"(%18991, %18992) : (!torch.int, !torch.int) -> !torch.int
%18994 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%18995 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%18996 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%18997 = "torch.prim.ListConstruct"(%18993, %18994, %18995, %18996) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%18998 = "torch.aten.view"(%18989, %18997) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%18998, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%18999 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%19000 = "torch.aten.mul.Scalar"(%arg69, %18999) : (!torch.vtensor<[4,?],si64>, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%19000, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%19001 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19002 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19003 = "torch.aten.add.Scalar"(%19000, %19001, %19002) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%19003, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%19004 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19005 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19006 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19007 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19008 = "torch.prim.ListConstruct"(%19004, %18477, %19005, %19006, %19007) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19009 = "torch.aten.view"(%18982, %19008) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19009, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19010 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19011 = "torch.aten.mul.int"(%19010, %18477) : (!torch.int, !torch.int) -> !torch.int
%19012 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19013 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19014 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19015 = "torch.prim.ListConstruct"(%19011, %19012, %19013, %19014) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19016 = "torch.aten.view"(%19009, %19015) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19016, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19017 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%19018 = "torch.aten.view"(%19003, %19017) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%19018, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%19019 = "torch.prim.ListConstruct"(%19018) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%19020 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19021 = "torch.aten.index_put"(%18998, %19019, %19016, %19020) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19021, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19022 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19023 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19024 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19025 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19026 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19027 = "torch.prim.ListConstruct"(%18479, %19022, %19023, %19024, %19025, %19026) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19028 = "torch.aten.view"(%19021, %19027) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19028, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19029 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%19030 = "torch.prim.ListConstruct"(%18479, %19029) : (!torch.int, !torch.int) -> !torch.list<int>
%19031 = "torch.aten.view"(%19028, %19030) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19031, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%19032 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19033 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19034 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19035 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19036 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19037 = "torch.prim.ListConstruct"(%18479, %19032, %19033, %19034, %19035, %19036) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19038 = "torch.aten.view"(%19031, %19037) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19038, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19039 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19040 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19041 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19042 = "torch.prim.ListConstruct"(%18993, %19039, %19040, %19041) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19043 = "torch.aten.view"(%19038, %19042) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19043, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19044 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19045 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19046 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19047 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19048 = "torch.prim.ListConstruct"(%19044, %18477, %19045, %19046, %19047) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19049 = "torch.aten.view"(%18682, %19048) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19049, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19050 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19051 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19052 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19053 = "torch.prim.ListConstruct"(%19011, %19050, %19051, %19052) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19054 = "torch.aten.view"(%19049, %19053) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19054, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19055 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19056 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19057 = "torch.aten.add.Scalar"(%19003, %19055, %19056) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%19057, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%19058 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%19059 = "torch.aten.view"(%19057, %19058) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%19059, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%19060 = "torch.prim.ListConstruct"(%19059) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%19061 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19062 = "torch.aten.index_put"(%19043, %19060, %19054, %19061) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19062, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19063 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19064 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19065 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19066 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19067 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19068 = "torch.prim.ListConstruct"(%18479, %19063, %19064, %19065, %19066, %19067) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19069 = "torch.aten.view"(%19062, %19068) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19069, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19070 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%19071 = "torch.prim.ListConstruct"(%18479, %19070) : (!torch.int, !torch.int) -> !torch.list<int>
%19072 = "torch.aten.view"(%19069, %19071) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19072, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%19073 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%19074 = "torch.aten.unsqueeze"(%18982, %19073) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19074, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%19075 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19076 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19077 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19078 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19079 = "torch.prim.ListConstruct"(%19075, %18481, %19076, %19077, %19078) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19080 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19081 = "torch.aten.expand"(%19074, %19079, %19080) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19081, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%19082 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19083 = "torch.aten.clone"(%19081, %19082) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19083, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%19084 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19085 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19086 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19087 = "torch.prim.ListConstruct"(%19084, %18481, %19085, %19086) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19088 = "torch.aten._unsafe_view"(%19083, %19087) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19088, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19089 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%19090 = "torch.aten.unsqueeze"(%18682, %19089) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19090, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%19091 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19092 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19093 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19094 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19095 = "torch.prim.ListConstruct"(%19091, %18481, %19092, %19093, %19094) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19096 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19097 = "torch.aten.expand"(%19090, %19095, %19096) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19097, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%19098 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19099 = "torch.aten.clone"(%19097, %19098) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19099, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%19100 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19101 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19102 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19103 = "torch.prim.ListConstruct"(%19100, %18481, %19101, %19102) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19104 = "torch.aten._unsafe_view"(%19099, %19103) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19104, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19105 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19106 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19107 = "torch.aten.transpose.int"(%18832, %19105, %19106) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19107, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%19108 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19109 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19110 = "torch.aten.transpose.int"(%19088, %19108, %19109) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19110, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%19111 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19112 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19113 = "torch.aten.transpose.int"(%19104, %19111, %19112) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19113, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%19114 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19115 = "torch.aten.squeeze.dim"(%18570, %19114) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19115, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%19116 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19117 = "torch.aten.squeeze.dim"(%19115, %19116) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19117, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%19118 = "torch_c.to_builtin_tensor"(%19107) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%19119 = "torch_c.to_builtin_tensor"(%19110) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%19120 = "torch_c.to_builtin_tensor"(%19113) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%19121 = "torch_c.to_builtin_tensor"(%19117) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>) -> tensor<4x1x?x?xf8E4M3FNUZ>
%19122 = "tensor.cast"(%19121) : (tensor<4x1x?x?xf8E4M3FNUZ>) -> tensor<?x?x?x?xf8E4M3FNUZ>
%19123 = "torch_c.to_builtin_tensor"(%17209) : (!torch.vtensor<[],f32>) -> tensor<f32>
%19124 = "util.call"(%19118, %19119, %19120, %19123, %19122) <{callee = @sharktank_masked_flash_attention_4_32_128_128_f8E4M3FNUZ_f32_f32}> : (tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<f32>, tensor<?x?x?x?xf8E4M3FNUZ>) -> tensor<4x32x?x128xf32>
%19125 = "torch_c.from_builtin_tensor"(%19124) : (tensor<4x32x?x128xf32>) -> !torch.vtensor<[4,32,?,128],f32>
"torch.bind_symbolic_shape"(%19125, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f32>, !torch.int) -> ()
%19126 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19127 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19128 = "torch.aten.transpose.int"(%19125, %19126, %19127) : (!torch.vtensor<[4,32,?,128],f32>, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%19128, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%19129 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19130 = "torch.aten.clone"(%19128, %19129) : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%19130, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%19131 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19132 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19133 = "torch.prim.ListConstruct"(%19131, %18481, %19132) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19134 = "torch.aten._unsafe_view"(%19130, %19133) : (!torch.vtensor<[4,?,32,128],f32>, !torch.list<int>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19134, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19135 = "torch.aten.div.Tensor"(%19134, %17211) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19135, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19136 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19137 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19138 = "torch.aten.clamp"(%19135, %19136, %19137) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19138, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19139 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19140 = "torch.prims.convert_element_type"(%19138, %19139) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19140, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19141 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19142 = "torch.aten.unsqueeze"(%17213, %19141) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%19143 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19144 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19145 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19146 = "torch.prim.ListConstruct"(%19143, %19144, %19145) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19147 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19148 = "torch.aten.expand"(%19142, %19146, %19147) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%19149 = "torch_c.to_builtin_tensor"(%19140) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19150 = "torch_c.to_builtin_tensor"(%19148) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%19151 = "util.call"(%19149, %19150) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%19152 = "torch_c.from_builtin_tensor"(%19151) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19152, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19153 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19154 = "torch.prims.convert_element_type"(%19152, %19153) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19154, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19155 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19156 = "torch.aten.add.Tensor"(%18576, %19154, %19155) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19156, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19157 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19158 = "torch.prims.convert_element_type"(%19156, %19157) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19158, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19159 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19160 = "torch.aten.pow.Tensor_Scalar"(%19158, %19159) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19160, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19161 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%19162 = "torch.prim.ListConstruct"(%19161) : (!torch.int) -> !torch.list<int>
%19163 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%19164 = "torch.constant.none"() : () -> !torch.none
%19165 = "torch.aten.mean.dim"(%19160, %19162, %19163, %19164) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19165, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19166 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%19167 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19168 = "torch.aten.add.Scalar"(%19165, %19166, %19167) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19168, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19169 = "torch.aten.rsqrt"(%19168) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19169, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19170 = "torch.aten.mul.Tensor"(%19158, %19169) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19170, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19171 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19172 = "torch.prims.convert_element_type"(%19170, %19171) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19172, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19173 = "torch.aten.mul.Tensor"(%17215, %19172) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19173, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19174 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19175 = "torch.prims.convert_element_type"(%19173, %19174) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19175, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19176 = "torch.aten.div.Tensor"(%19175, %17217) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19176, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19177 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19178 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19179 = "torch.aten.clamp"(%19176, %19177, %19178) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19179, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19180 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19181 = "torch.prims.convert_element_type"(%19179, %19180) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19181, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19182 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19183 = "torch.aten.unsqueeze"(%17219, %19182) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%19184 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19185 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%19186 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19187 = "torch.prim.ListConstruct"(%19184, %19185, %19186) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19188 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19189 = "torch.aten.expand"(%19183, %19187, %19188) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%19190 = "torch_c.to_builtin_tensor"(%19181) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19191 = "torch_c.to_builtin_tensor"(%19189) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%19192 = "util.call"(%19190, %19191) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%19193 = "torch_c.from_builtin_tensor"(%19192) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%19193, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%19194 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19195 = "torch.prims.convert_element_type"(%19193, %19194) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19195, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19196 = "torch.aten.silu"(%19195) : (!torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19196, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19197 = "torch.aten.div.Tensor"(%19175, %17221) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19197, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19198 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19199 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19200 = "torch.aten.clamp"(%19197, %19198, %19199) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19200, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19201 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19202 = "torch.prims.convert_element_type"(%19200, %19201) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19202, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19203 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19204 = "torch.aten.unsqueeze"(%17223, %19203) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%19205 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19206 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%19207 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19208 = "torch.prim.ListConstruct"(%19205, %19206, %19207) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19209 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19210 = "torch.aten.expand"(%19204, %19208, %19209) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%19211 = "torch_c.to_builtin_tensor"(%19202) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19212 = "torch_c.to_builtin_tensor"(%19210) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%19213 = "util.call"(%19211, %19212) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%19214 = "torch_c.from_builtin_tensor"(%19213) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%19214, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%19215 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19216 = "torch.prims.convert_element_type"(%19214, %19215) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19216, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19217 = "torch.aten.mul.Tensor"(%19196, %19216) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19217, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19218 = "torch.aten.div.Tensor"(%19217, %17225) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19218, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19219 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19220 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19221 = "torch.aten.clamp"(%19218, %19219, %19220) : (!torch.vtensor<[4,?,14336],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19221, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19222 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19223 = "torch.prims.convert_element_type"(%19221, %19222) : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> !torch.vtensor<[4,?,14336],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19223, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>, !torch.int) -> ()
%19224 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19225 = "torch.aten.unsqueeze"(%17227, %19224) : (!torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,14336],f8E4M3FNUZ>
%19226 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19227 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19228 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%19229 = "torch.prim.ListConstruct"(%19226, %19227, %19228) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19230 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19231 = "torch.aten.expand"(%19225, %19229, %19230) : (!torch.vtensor<[1,4096,14336],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,14336],f8E4M3FNUZ>
%19232 = "torch_c.to_builtin_tensor"(%19223) : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>) -> tensor<4x?x14336xf8E4M3FNUZ>
%19233 = "torch_c.to_builtin_tensor"(%19231) : (!torch.vtensor<[4,4096,14336],f8E4M3FNUZ>) -> tensor<4x4096x14336xf8E4M3FNUZ>
%19234 = "util.call"(%19232, %19233) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx14336xf8E4M3FNUZ_R4x4096x14336xf8E4M3FNUZ}> : (tensor<4x?x14336xf8E4M3FNUZ>, tensor<4x4096x14336xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%19235 = "torch_c.from_builtin_tensor"(%19234) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19235, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19236 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19237 = "torch.prims.convert_element_type"(%19235, %19236) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19237, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19238 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19239 = "torch.aten.add.Tensor"(%19156, %19237, %19238) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19239, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19240 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19241 = "torch.prims.convert_element_type"(%19239, %19240) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19241, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19242 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19243 = "torch.aten.pow.Tensor_Scalar"(%19241, %19242) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19243, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19244 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%19245 = "torch.prim.ListConstruct"(%19244) : (!torch.int) -> !torch.list<int>
%19246 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%19247 = "torch.constant.none"() : () -> !torch.none
%19248 = "torch.aten.mean.dim"(%19243, %19245, %19246, %19247) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19248, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19249 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%19250 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19251 = "torch.aten.add.Scalar"(%19248, %19249, %19250) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19251, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19252 = "torch.aten.rsqrt"(%19251) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19252, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19253 = "torch.aten.mul.Tensor"(%19241, %19252) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19253, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19254 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19255 = "torch.prims.convert_element_type"(%19253, %19254) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19255, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19256 = "torch.aten.mul.Tensor"(%17229, %19255) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19256, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19257 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19258 = "torch.prims.convert_element_type"(%19256, %19257) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19258, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19259 = "torch.aten.div.Tensor"(%19258, %17231) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19259, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19260 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19261 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19262 = "torch.aten.clamp"(%19259, %19260, %19261) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19262, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19263 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19264 = "torch.prims.convert_element_type"(%19262, %19263) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19264, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19265 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19266 = "torch.aten.unsqueeze"(%17233, %19265) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%19267 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19268 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19269 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19270 = "torch.prim.ListConstruct"(%19267, %19268, %19269) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19271 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19272 = "torch.aten.expand"(%19266, %19270, %19271) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%19273 = "torch_c.to_builtin_tensor"(%19264) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19274 = "torch_c.to_builtin_tensor"(%19272) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%19275 = "util.call"(%19273, %19274) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%19276 = "torch_c.from_builtin_tensor"(%19275) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19276, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19277 = "torch.aten.div.Tensor"(%19276, %17235) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19277, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19278 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19279 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19280 = "torch.aten.clamp"(%19277, %19278, %19279) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19280, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19281 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19282 = "torch.prims.convert_element_type"(%19280, %19281) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19282, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19283 = "torch.aten.div.Tensor"(%19258, %17237) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19283, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19284 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19285 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19286 = "torch.aten.clamp"(%19283, %19284, %19285) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19286, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19287 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19288 = "torch.prims.convert_element_type"(%19286, %19287) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19288, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19289 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19290 = "torch.aten.unsqueeze"(%17239, %19289) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%19291 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19292 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%19293 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19294 = "torch.prim.ListConstruct"(%19291, %19292, %19293) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19295 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19296 = "torch.aten.expand"(%19290, %19294, %19295) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%19297 = "torch_c.to_builtin_tensor"(%19288) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19298 = "torch_c.to_builtin_tensor"(%19296) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%19299 = "util.call"(%19297, %19298) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%19300 = "torch_c.from_builtin_tensor"(%19299) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19300, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19301 = "torch.aten.div.Tensor"(%19300, %17241) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19301, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19302 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19303 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19304 = "torch.aten.clamp"(%19301, %19302, %19303) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19304, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19305 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19306 = "torch.prims.convert_element_type"(%19304, %19305) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19306, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%19307 = "torch.aten.div.Tensor"(%19258, %17243) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19307, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19308 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19309 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19310 = "torch.aten.clamp"(%19307, %19308, %19309) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19310, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19311 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19312 = "torch.prims.convert_element_type"(%19310, %19311) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19312, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19313 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19314 = "torch.aten.unsqueeze"(%17245, %19313) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%19315 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19316 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%19317 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19318 = "torch.prim.ListConstruct"(%19315, %19316, %19317) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19319 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19320 = "torch.aten.expand"(%19314, %19318, %19319) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%19321 = "torch_c.to_builtin_tensor"(%19312) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19322 = "torch_c.to_builtin_tensor"(%19320) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%19323 = "util.call"(%19321, %19322) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%19324 = "torch_c.from_builtin_tensor"(%19323) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19324, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19325 = "torch.aten.div.Tensor"(%19324, %17247) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19325, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19326 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19327 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19328 = "torch.aten.clamp"(%19325, %19326, %19327) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19328, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19329 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19330 = "torch.prims.convert_element_type"(%19328, %19329) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19330, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%19331 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19332 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19333 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19334 = "torch.prim.ListConstruct"(%19331, %18481, %19332, %19333) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19335 = "torch.aten.view"(%19282, %19334) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19335, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19336 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19337 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19338 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19339 = "torch.prim.ListConstruct"(%19336, %18481, %19337, %19338) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19340 = "torch.aten.view"(%19306, %19339) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19340, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19341 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19342 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19343 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19344 = "torch.prim.ListConstruct"(%19341, %18481, %19342, %19343) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19345 = "torch.aten.view"(%19330, %19344) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19345, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19346 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%19347 = "torch.constant.none"() : () -> !torch.none
%19348 = "torch.constant.none"() : () -> !torch.none
%19349 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%19350 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19351 = "torch.aten.arange"(%19346, %19347, %19348, %19349, %19350) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%19352 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19353 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19354 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19355 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19356 = "torch.constant.none"() : () -> !torch.none
%19357 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%19358 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19359 = "torch.aten.arange.start_step"(%19352, %19353, %19354, %19355, %19356, %19357, %19358) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%19360 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19361 = "torch.prims.convert_element_type"(%19359, %19360) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%19362 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19363 = "torch.aten.div.Scalar"(%19361, %19362) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19364 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%19365 = "torch.aten.pow.Scalar"(%19364, %19363) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19366 = "torch.aten.reciprocal"(%19365) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19367 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%19368 = "torch.aten.mul.Scalar"(%19366, %19367) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%19369 = "torch.aten.reciprocal"(%19368) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19370 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%19371 = "torch.aten.mul.Scalar"(%19369, %19370) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%19372 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%19373 = "torch.aten.gt.Scalar"(%19371, %19372) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%19374 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19375 = "torch.aten.div.Scalar"(%19368, %19374) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19376 = "torch.aten.where.self"(%19373, %19375, %19368) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19377 = "torch.aten.reciprocal"(%19371) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19378 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%19379 = "torch.aten.mul.Scalar"(%19377, %19378) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19380 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19381 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19382 = "torch.aten.sub.Scalar"(%19379, %19380, %19381) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%19383 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19384 = "torch.aten.div.Scalar"(%19382, %19383) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19385 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19386 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19387 = "torch.aten.rsub.Scalar"(%19384, %19385, %19386) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%19388 = "torch.aten.mul.Tensor"(%19387, %19376) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19389 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19390 = "torch.aten.div.Scalar"(%19388, %19389) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19391 = "torch.aten.mul.Tensor"(%19384, %19376) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19392 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19393 = "torch.aten.add.Tensor"(%19390, %19391, %19392) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19394 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%19395 = "torch.aten.lt.Scalar"(%19371, %19394) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%19396 = "torch.aten.bitwise_not"(%19395) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%19397 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%19398 = "torch.aten.gt.Scalar"(%19371, %19397) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%19399 = "torch.aten.bitwise_not"(%19398) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%19400 = "torch.aten.mul.Tensor"(%19396, %19399) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%19401 = "torch.aten.where.self"(%19400, %19393, %19376) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19402 = "torch.prim.ListConstruct"(%19401, %19401) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%19403 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%19404 = "torch.aten.cat"(%19402, %19403) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%19405 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19406 = "torch.prims.convert_element_type"(%19351, %19405) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%19407 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19408 = "torch.prims.convert_element_type"(%19404, %19407) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%19409 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%19410 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19411 = "torch.prim.ListConstruct"(%19409, %19410) : (!torch.int, !torch.int) -> !torch.list<int>
%19412 = "torch.aten.view"(%19406, %19411) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%19413 = "torch.aten.mul.Tensor"(%19412, %19408) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%19414 = "torch.aten.cos"(%19413) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%19415 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19416 = "torch.prims.convert_element_type"(%19414, %19415) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%19417 = "torch.aten.sin"(%19413) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%19418 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19419 = "torch.prims.convert_element_type"(%19417, %19418) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%19420 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19421 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19422 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19423 = "torch.aten.slice.Tensor"(%19416, %19420, %19421, %18481, %19422) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%19423, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%19424 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19425 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19426 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19427 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19428 = "torch.aten.slice.Tensor"(%19423, %19424, %19425, %19426, %19427) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%19428, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%19429 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19430 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19431 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19432 = "torch.aten.slice.Tensor"(%19419, %19429, %19430, %18481, %19431) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%19432, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%19433 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19434 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19435 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19436 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19437 = "torch.aten.slice.Tensor"(%19432, %19433, %19434, %19435, %19436) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%19437, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%19438 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19439 = "torch.aten.unsqueeze"(%19428, %19438) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%19439, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%19440 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19441 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19442 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19443 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19444 = "torch.aten.slice.Tensor"(%19439, %19440, %19441, %19442, %19443) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%19444, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%19445 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19446 = "torch.aten.unsqueeze"(%19444, %19445) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19446, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%19447 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19448 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19449 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19450 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19451 = "torch.aten.slice.Tensor"(%19446, %19447, %19448, %19449, %19450) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19451, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%19452 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19453 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19454 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19455 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19456 = "torch.prim.ListConstruct"(%19452, %19453, %19454, %19455) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19457 = "torch.aten.repeat"(%19451, %19456) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19457, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%19458 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19459 = "torch.aten.unsqueeze"(%19437, %19458) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%19459, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%19460 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19461 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19462 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19463 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19464 = "torch.aten.slice.Tensor"(%19459, %19460, %19461, %19462, %19463) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%19464, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%19465 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19466 = "torch.aten.unsqueeze"(%19464, %19465) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19466, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%19467 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19468 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19469 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19470 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19471 = "torch.aten.slice.Tensor"(%19466, %19467, %19468, %19469, %19470) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19471, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%19472 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19473 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19474 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19475 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19476 = "torch.prim.ListConstruct"(%19472, %19473, %19474, %19475) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19477 = "torch.aten.repeat"(%19471, %19476) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19477, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%19478 = "torch.aten.mul.Tensor"(%19335, %19457) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19478, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19479 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19480 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19481 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%19482 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19483 = "torch.aten.slice.Tensor"(%19335, %19479, %19480, %19481, %19482) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19483, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%19484 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19485 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%19486 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19487 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19488 = "torch.aten.slice.Tensor"(%19335, %19484, %19485, %19486, %19487) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19488, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%19489 = "torch.aten.neg"(%19488) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19489, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%19490 = "torch.prim.ListConstruct"(%19489, %19483) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%19491 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%19492 = "torch.aten.cat"(%19490, %19491) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19492, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19493 = "torch.aten.mul.Tensor"(%19492, %19477) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19493, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19494 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19495 = "torch.aten.add.Tensor"(%19478, %19493, %19494) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19495, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19496 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%19497 = "torch.constant.none"() : () -> !torch.none
%19498 = "torch.constant.none"() : () -> !torch.none
%19499 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%19500 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19501 = "torch.aten.arange"(%19496, %19497, %19498, %19499, %19500) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%19502 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19503 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19504 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19505 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19506 = "torch.constant.none"() : () -> !torch.none
%19507 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%19508 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19509 = "torch.aten.arange.start_step"(%19502, %19503, %19504, %19505, %19506, %19507, %19508) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%19510 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19511 = "torch.prims.convert_element_type"(%19509, %19510) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%19512 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19513 = "torch.aten.div.Scalar"(%19511, %19512) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19514 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%19515 = "torch.aten.pow.Scalar"(%19514, %19513) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19516 = "torch.aten.reciprocal"(%19515) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19517 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%19518 = "torch.aten.mul.Scalar"(%19516, %19517) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%19519 = "torch.aten.reciprocal"(%19518) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19520 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%19521 = "torch.aten.mul.Scalar"(%19519, %19520) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%19522 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%19523 = "torch.aten.gt.Scalar"(%19521, %19522) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%19524 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19525 = "torch.aten.div.Scalar"(%19518, %19524) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19526 = "torch.aten.where.self"(%19523, %19525, %19518) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19527 = "torch.aten.reciprocal"(%19521) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19528 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%19529 = "torch.aten.mul.Scalar"(%19527, %19528) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19530 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19531 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19532 = "torch.aten.sub.Scalar"(%19529, %19530, %19531) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%19533 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19534 = "torch.aten.div.Scalar"(%19532, %19533) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19535 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19536 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19537 = "torch.aten.rsub.Scalar"(%19534, %19535, %19536) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%19538 = "torch.aten.mul.Tensor"(%19537, %19526) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19539 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19540 = "torch.aten.div.Scalar"(%19538, %19539) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19541 = "torch.aten.mul.Tensor"(%19534, %19526) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19542 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19543 = "torch.aten.add.Tensor"(%19540, %19541, %19542) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%19544 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%19545 = "torch.aten.lt.Scalar"(%19521, %19544) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%19546 = "torch.aten.bitwise_not"(%19545) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%19547 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%19548 = "torch.aten.gt.Scalar"(%19521, %19547) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%19549 = "torch.aten.bitwise_not"(%19548) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%19550 = "torch.aten.mul.Tensor"(%19546, %19549) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%19551 = "torch.aten.where.self"(%19550, %19543, %19526) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%19552 = "torch.prim.ListConstruct"(%19551, %19551) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%19553 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%19554 = "torch.aten.cat"(%19552, %19553) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%19555 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19556 = "torch.prims.convert_element_type"(%19501, %19555) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%19557 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19558 = "torch.prims.convert_element_type"(%19554, %19557) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%19559 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%19560 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19561 = "torch.prim.ListConstruct"(%19559, %19560) : (!torch.int, !torch.int) -> !torch.list<int>
%19562 = "torch.aten.view"(%19556, %19561) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%19563 = "torch.aten.mul.Tensor"(%19562, %19558) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%19564 = "torch.aten.cos"(%19563) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%19565 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19566 = "torch.prims.convert_element_type"(%19564, %19565) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%19567 = "torch.aten.sin"(%19563) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%19568 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19569 = "torch.prims.convert_element_type"(%19567, %19568) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%19570 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19571 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19572 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19573 = "torch.aten.slice.Tensor"(%19566, %19570, %19571, %18481, %19572) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%19573, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%19574 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19575 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19576 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19577 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19578 = "torch.aten.slice.Tensor"(%19573, %19574, %19575, %19576, %19577) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%19578, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%19579 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19580 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19581 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19582 = "torch.aten.slice.Tensor"(%19569, %19579, %19580, %18481, %19581) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%19582, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%19583 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19584 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19585 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19586 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19587 = "torch.aten.slice.Tensor"(%19582, %19583, %19584, %19585, %19586) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%19587, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%19588 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19589 = "torch.aten.unsqueeze"(%19578, %19588) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%19589, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%19590 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19591 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19592 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19593 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19594 = "torch.aten.slice.Tensor"(%19589, %19590, %19591, %19592, %19593) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%19594, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%19595 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19596 = "torch.aten.unsqueeze"(%19594, %19595) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19596, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%19597 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19598 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19599 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19600 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19601 = "torch.aten.slice.Tensor"(%19596, %19597, %19598, %19599, %19600) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19601, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%19602 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19603 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19604 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19605 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19606 = "torch.prim.ListConstruct"(%19602, %19603, %19604, %19605) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19607 = "torch.aten.repeat"(%19601, %19606) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19607, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%19608 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19609 = "torch.aten.unsqueeze"(%19587, %19608) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%19609, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%19610 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19611 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19612 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19613 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19614 = "torch.aten.slice.Tensor"(%19609, %19610, %19611, %19612, %19613) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%19614, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%19615 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19616 = "torch.aten.unsqueeze"(%19614, %19615) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19616, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%19617 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19618 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19619 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19620 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19621 = "torch.aten.slice.Tensor"(%19616, %19617, %19618, %19619, %19620) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19621, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%19622 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19623 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19624 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19625 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19626 = "torch.prim.ListConstruct"(%19622, %19623, %19624, %19625) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19627 = "torch.aten.repeat"(%19621, %19626) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%19627, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%19628 = "torch.aten.mul.Tensor"(%19340, %19607) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19628, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19629 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19630 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19631 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%19632 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19633 = "torch.aten.slice.Tensor"(%19340, %19629, %19630, %19631, %19632) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19633, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%19634 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%19635 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%19636 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%19637 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19638 = "torch.aten.slice.Tensor"(%19340, %19634, %19635, %19636, %19637) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19638, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%19639 = "torch.aten.neg"(%19638) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19639, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%19640 = "torch.prim.ListConstruct"(%19639, %19633) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%19641 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%19642 = "torch.aten.cat"(%19640, %19641) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19642, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19643 = "torch.aten.mul.Tensor"(%19642, %19627) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19643, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19644 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19645 = "torch.aten.add.Tensor"(%19628, %19643, %19644) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19645, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19646 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%19647 = "torch.aten.mul.Scalar"(%arg69, %19646) : (!torch.vtensor<[4,?],si64>, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%19647, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%19648 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19649 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19650 = "torch.aten.add.Scalar"(%19647, %19648, %19649) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%19650, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%19651 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19652 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19653 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19654 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19655 = "torch.prim.ListConstruct"(%19651, %18477, %19652, %19653, %19654) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19656 = "torch.aten.view"(%19645, %19655) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19656, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19657 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19658 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19659 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19660 = "torch.prim.ListConstruct"(%19011, %19657, %19658, %19659) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19661 = "torch.aten.view"(%19656, %19660) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19661, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19662 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%19663 = "torch.aten.view"(%19650, %19662) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%19663, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%19664 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19665 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19666 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19667 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19668 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19669 = "torch.prim.ListConstruct"(%18479, %19664, %19665, %19666, %19667, %19668) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19670 = "torch.aten.view"(%19072, %19669) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19670, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19671 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19672 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19673 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19674 = "torch.prim.ListConstruct"(%18993, %19671, %19672, %19673) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19675 = "torch.aten.view"(%19670, %19674) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19675, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19676 = "torch.prim.ListConstruct"(%19663) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%19677 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19678 = "torch.aten.index_put"(%19675, %19676, %19661, %19677) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19678, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19679 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19680 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19681 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19682 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19683 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19684 = "torch.prim.ListConstruct"(%18479, %19679, %19680, %19681, %19682, %19683) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19685 = "torch.aten.view"(%19678, %19684) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19685, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19686 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%19687 = "torch.prim.ListConstruct"(%18479, %19686) : (!torch.int, !torch.int) -> !torch.list<int>
%19688 = "torch.aten.view"(%19685, %19687) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19688, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%19689 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19690 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19691 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19692 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19693 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19694 = "torch.prim.ListConstruct"(%18479, %19689, %19690, %19691, %19692, %19693) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19695 = "torch.aten.view"(%19688, %19694) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19695, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19696 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19697 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19698 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19699 = "torch.prim.ListConstruct"(%18993, %19696, %19697, %19698) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19700 = "torch.aten.view"(%19695, %19699) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19700, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19701 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19702 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19703 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19704 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19705 = "torch.prim.ListConstruct"(%19701, %18477, %19702, %19703, %19704) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19706 = "torch.aten.view"(%19345, %19705) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19706, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19707 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19708 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19709 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19710 = "torch.prim.ListConstruct"(%19011, %19707, %19708, %19709) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19711 = "torch.aten.view"(%19706, %19710) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19711, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19712 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19713 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19714 = "torch.aten.add.Scalar"(%19650, %19712, %19713) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%19714, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%19715 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%19716 = "torch.aten.view"(%19714, %19715) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%19716, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%19717 = "torch.prim.ListConstruct"(%19716) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%19718 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19719 = "torch.aten.index_put"(%19700, %19717, %19711, %19718) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19719, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19720 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19721 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19722 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19723 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19724 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19725 = "torch.prim.ListConstruct"(%18479, %19720, %19721, %19722, %19723, %19724) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19726 = "torch.aten.view"(%19719, %19725) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19726, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19727 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%19728 = "torch.prim.ListConstruct"(%18479, %19727) : (!torch.int, !torch.int) -> !torch.list<int>
%19729 = "torch.aten.view"(%19726, %19728) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19729, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%19730 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%19731 = "torch.aten.unsqueeze"(%19645, %19730) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19731, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%19732 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19733 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19734 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19735 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19736 = "torch.prim.ListConstruct"(%19732, %18481, %19733, %19734, %19735) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19737 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19738 = "torch.aten.expand"(%19731, %19736, %19737) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19738, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%19739 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19740 = "torch.aten.clone"(%19738, %19739) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19740, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%19741 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19742 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19743 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19744 = "torch.prim.ListConstruct"(%19741, %18481, %19742, %19743) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19745 = "torch.aten._unsafe_view"(%19740, %19744) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19745, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19746 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%19747 = "torch.aten.unsqueeze"(%19345, %19746) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19747, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%19748 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19749 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19750 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19751 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19752 = "torch.prim.ListConstruct"(%19748, %18481, %19749, %19750, %19751) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19753 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19754 = "torch.aten.expand"(%19747, %19752, %19753) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19754, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%19755 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19756 = "torch.aten.clone"(%19754, %19755) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19756, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%19757 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19758 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19759 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19760 = "torch.prim.ListConstruct"(%19757, %18481, %19758, %19759) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19761 = "torch.aten._unsafe_view"(%19756, %19760) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19761, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19762 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19763 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19764 = "torch.aten.transpose.int"(%19495, %19762, %19763) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19764, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%19765 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19766 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19767 = "torch.aten.transpose.int"(%19745, %19765, %19766) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19767, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%19768 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19769 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19770 = "torch.aten.transpose.int"(%19761, %19768, %19769) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19770, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%19771 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19772 = "torch.aten.squeeze.dim"(%18570, %19771) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19772, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%19773 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19774 = "torch.aten.squeeze.dim"(%19772, %19773) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19774, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%19775 = "torch_c.to_builtin_tensor"(%19764) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%19776 = "torch_c.to_builtin_tensor"(%19767) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%19777 = "torch_c.to_builtin_tensor"(%19770) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%19778 = "torch_c.to_builtin_tensor"(%19774) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>) -> tensor<4x1x?x?xf8E4M3FNUZ>
%19779 = "tensor.cast"(%19778) : (tensor<4x1x?x?xf8E4M3FNUZ>) -> tensor<?x?x?x?xf8E4M3FNUZ>
%19780 = "torch_c.to_builtin_tensor"(%17249) : (!torch.vtensor<[],f32>) -> tensor<f32>
%19781 = "util.call"(%19775, %19776, %19777, %19780, %19779) <{callee = @sharktank_masked_flash_attention_4_32_128_128_f8E4M3FNUZ_f32_f32}> : (tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<f32>, tensor<?x?x?x?xf8E4M3FNUZ>) -> tensor<4x32x?x128xf32>
%19782 = "torch_c.from_builtin_tensor"(%19781) : (tensor<4x32x?x128xf32>) -> !torch.vtensor<[4,32,?,128],f32>
"torch.bind_symbolic_shape"(%19782, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f32>, !torch.int) -> ()
%19783 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19784 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19785 = "torch.aten.transpose.int"(%19782, %19783, %19784) : (!torch.vtensor<[4,32,?,128],f32>, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%19785, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%19786 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19787 = "torch.aten.clone"(%19785, %19786) : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%19787, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%19788 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19789 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19790 = "torch.prim.ListConstruct"(%19788, %18481, %19789) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19791 = "torch.aten._unsafe_view"(%19787, %19790) : (!torch.vtensor<[4,?,32,128],f32>, !torch.list<int>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19791, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19792 = "torch.aten.div.Tensor"(%19791, %17251) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19792, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19793 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19794 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19795 = "torch.aten.clamp"(%19792, %19793, %19794) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19795, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19796 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19797 = "torch.prims.convert_element_type"(%19795, %19796) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19797, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19798 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19799 = "torch.aten.unsqueeze"(%17253, %19798) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%19800 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19801 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19802 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19803 = "torch.prim.ListConstruct"(%19800, %19801, %19802) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19804 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19805 = "torch.aten.expand"(%19799, %19803, %19804) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%19806 = "torch_c.to_builtin_tensor"(%19797) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19807 = "torch_c.to_builtin_tensor"(%19805) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%19808 = "util.call"(%19806, %19807) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%19809 = "torch_c.from_builtin_tensor"(%19808) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19809, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19810 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19811 = "torch.prims.convert_element_type"(%19809, %19810) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19811, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19812 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19813 = "torch.aten.add.Tensor"(%19239, %19811, %19812) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19813, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19814 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19815 = "torch.prims.convert_element_type"(%19813, %19814) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19815, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19816 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19817 = "torch.aten.pow.Tensor_Scalar"(%19815, %19816) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19817, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19818 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%19819 = "torch.prim.ListConstruct"(%19818) : (!torch.int) -> !torch.list<int>
%19820 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%19821 = "torch.constant.none"() : () -> !torch.none
%19822 = "torch.aten.mean.dim"(%19817, %19819, %19820, %19821) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19822, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19823 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%19824 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19825 = "torch.aten.add.Scalar"(%19822, %19823, %19824) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19825, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19826 = "torch.aten.rsqrt"(%19825) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19826, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19827 = "torch.aten.mul.Tensor"(%19815, %19826) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19827, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19828 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19829 = "torch.prims.convert_element_type"(%19827, %19828) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19829, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19830 = "torch.aten.mul.Tensor"(%17255, %19829) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19830, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19831 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19832 = "torch.prims.convert_element_type"(%19830, %19831) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19832, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19833 = "torch.aten.div.Tensor"(%19832, %17257) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19833, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19834 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19835 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19836 = "torch.aten.clamp"(%19833, %19834, %19835) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19836, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19837 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19838 = "torch.prims.convert_element_type"(%19836, %19837) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19838, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19839 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19840 = "torch.aten.unsqueeze"(%17259, %19839) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%19841 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19842 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%19843 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19844 = "torch.prim.ListConstruct"(%19841, %19842, %19843) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19845 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19846 = "torch.aten.expand"(%19840, %19844, %19845) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%19847 = "torch_c.to_builtin_tensor"(%19838) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19848 = "torch_c.to_builtin_tensor"(%19846) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%19849 = "util.call"(%19847, %19848) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%19850 = "torch_c.from_builtin_tensor"(%19849) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%19850, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%19851 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19852 = "torch.prims.convert_element_type"(%19850, %19851) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19852, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19853 = "torch.aten.silu"(%19852) : (!torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19853, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19854 = "torch.aten.div.Tensor"(%19832, %17261) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19854, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19855 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19856 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19857 = "torch.aten.clamp"(%19854, %19855, %19856) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19857, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19858 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19859 = "torch.prims.convert_element_type"(%19857, %19858) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19859, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19860 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19861 = "torch.aten.unsqueeze"(%17263, %19860) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%19862 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19863 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%19864 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19865 = "torch.prim.ListConstruct"(%19862, %19863, %19864) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19866 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19867 = "torch.aten.expand"(%19861, %19865, %19866) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%19868 = "torch_c.to_builtin_tensor"(%19859) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19869 = "torch_c.to_builtin_tensor"(%19867) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%19870 = "util.call"(%19868, %19869) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%19871 = "torch_c.from_builtin_tensor"(%19870) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%19871, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%19872 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19873 = "torch.prims.convert_element_type"(%19871, %19872) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19873, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19874 = "torch.aten.mul.Tensor"(%19853, %19873) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19874, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19875 = "torch.aten.div.Tensor"(%19874, %17265) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19875, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19876 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19877 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19878 = "torch.aten.clamp"(%19875, %19876, %19877) : (!torch.vtensor<[4,?,14336],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%19878, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%19879 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19880 = "torch.prims.convert_element_type"(%19878, %19879) : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> !torch.vtensor<[4,?,14336],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19880, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>, !torch.int) -> ()
%19881 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19882 = "torch.aten.unsqueeze"(%17267, %19881) : (!torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,14336],f8E4M3FNUZ>
%19883 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19884 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19885 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%19886 = "torch.prim.ListConstruct"(%19883, %19884, %19885) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19887 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19888 = "torch.aten.expand"(%19882, %19886, %19887) : (!torch.vtensor<[1,4096,14336],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,14336],f8E4M3FNUZ>
%19889 = "torch_c.to_builtin_tensor"(%19880) : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>) -> tensor<4x?x14336xf8E4M3FNUZ>
%19890 = "torch_c.to_builtin_tensor"(%19888) : (!torch.vtensor<[4,4096,14336],f8E4M3FNUZ>) -> tensor<4x4096x14336xf8E4M3FNUZ>
%19891 = "util.call"(%19889, %19890) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx14336xf8E4M3FNUZ_R4x4096x14336xf8E4M3FNUZ}> : (tensor<4x?x14336xf8E4M3FNUZ>, tensor<4x4096x14336xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%19892 = "torch_c.from_builtin_tensor"(%19891) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19892, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19893 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19894 = "torch.prims.convert_element_type"(%19892, %19893) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19894, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19895 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19896 = "torch.aten.add.Tensor"(%19813, %19894, %19895) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19896, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19897 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%19898 = "torch.prims.convert_element_type"(%19896, %19897) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19898, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19899 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%19900 = "torch.aten.pow.Tensor_Scalar"(%19898, %19899) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19900, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19901 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%19902 = "torch.prim.ListConstruct"(%19901) : (!torch.int) -> !torch.list<int>
%19903 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%19904 = "torch.constant.none"() : () -> !torch.none
%19905 = "torch.aten.mean.dim"(%19900, %19902, %19903, %19904) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19905, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19906 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%19907 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%19908 = "torch.aten.add.Scalar"(%19905, %19906, %19907) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19908, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19909 = "torch.aten.rsqrt"(%19908) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%19909, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%19910 = "torch.aten.mul.Tensor"(%19898, %19909) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19910, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19911 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19912 = "torch.prims.convert_element_type"(%19910, %19911) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19912, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19913 = "torch.aten.mul.Tensor"(%17269, %19912) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19913, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19914 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%19915 = "torch.prims.convert_element_type"(%19913, %19914) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19915, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19916 = "torch.aten.div.Tensor"(%19915, %17271) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19916, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19917 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19918 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19919 = "torch.aten.clamp"(%19916, %19917, %19918) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19919, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19920 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19921 = "torch.prims.convert_element_type"(%19919, %19920) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19921, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19922 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19923 = "torch.aten.unsqueeze"(%17273, %19922) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%19924 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19925 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19926 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19927 = "torch.prim.ListConstruct"(%19924, %19925, %19926) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19928 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19929 = "torch.aten.expand"(%19923, %19927, %19928) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%19930 = "torch_c.to_builtin_tensor"(%19921) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19931 = "torch_c.to_builtin_tensor"(%19929) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%19932 = "util.call"(%19930, %19931) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%19933 = "torch_c.from_builtin_tensor"(%19932) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19933, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19934 = "torch.aten.div.Tensor"(%19933, %17275) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19934, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19935 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19936 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19937 = "torch.aten.clamp"(%19934, %19935, %19936) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%19937, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%19938 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19939 = "torch.prims.convert_element_type"(%19937, %19938) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19939, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19940 = "torch.aten.div.Tensor"(%19915, %17277) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19940, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19941 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19942 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19943 = "torch.aten.clamp"(%19940, %19941, %19942) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19943, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19944 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19945 = "torch.prims.convert_element_type"(%19943, %19944) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19945, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19946 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19947 = "torch.aten.unsqueeze"(%17279, %19946) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%19948 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19949 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%19950 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19951 = "torch.prim.ListConstruct"(%19948, %19949, %19950) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19952 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19953 = "torch.aten.expand"(%19947, %19951, %19952) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%19954 = "torch_c.to_builtin_tensor"(%19945) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19955 = "torch_c.to_builtin_tensor"(%19953) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%19956 = "util.call"(%19954, %19955) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%19957 = "torch_c.from_builtin_tensor"(%19956) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19957, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19958 = "torch.aten.div.Tensor"(%19957, %17281) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19958, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19959 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19960 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19961 = "torch.aten.clamp"(%19958, %19959, %19960) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19961, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19962 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19963 = "torch.prims.convert_element_type"(%19961, %19962) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19963, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%19964 = "torch.aten.div.Tensor"(%19915, %17283) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19964, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19965 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19966 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19967 = "torch.aten.clamp"(%19964, %19965, %19966) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%19967, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%19968 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19969 = "torch.prims.convert_element_type"(%19967, %19968) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19969, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%19970 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%19971 = "torch.aten.unsqueeze"(%17285, %19970) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%19972 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19973 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%19974 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%19975 = "torch.prim.ListConstruct"(%19972, %19973, %19974) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19976 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%19977 = "torch.aten.expand"(%19971, %19975, %19976) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%19978 = "torch_c.to_builtin_tensor"(%19969) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%19979 = "torch_c.to_builtin_tensor"(%19977) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%19980 = "util.call"(%19978, %19979) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%19981 = "torch_c.from_builtin_tensor"(%19980) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19981, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19982 = "torch.aten.div.Tensor"(%19981, %17287) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19982, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19983 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%19984 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%19985 = "torch.aten.clamp"(%19982, %19983, %19984) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%19985, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%19986 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%19987 = "torch.prims.convert_element_type"(%19985, %19986) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19987, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%19988 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19989 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%19990 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19991 = "torch.prim.ListConstruct"(%19988, %18481, %19989, %19990) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19992 = "torch.aten.view"(%19939, %19991) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19992, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%19993 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19994 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%19995 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%19996 = "torch.prim.ListConstruct"(%19993, %18481, %19994, %19995) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%19997 = "torch.aten.view"(%19963, %19996) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%19997, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%19998 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%19999 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20000 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20001 = "torch.prim.ListConstruct"(%19998, %18481, %19999, %20000) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20002 = "torch.aten.view"(%19987, %20001) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20002, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20003 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%20004 = "torch.constant.none"() : () -> !torch.none
%20005 = "torch.constant.none"() : () -> !torch.none
%20006 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%20007 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20008 = "torch.aten.arange"(%20003, %20004, %20005, %20006, %20007) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%20009 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20010 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20011 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20012 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20013 = "torch.constant.none"() : () -> !torch.none
%20014 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%20015 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20016 = "torch.aten.arange.start_step"(%20009, %20010, %20011, %20012, %20013, %20014, %20015) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%20017 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20018 = "torch.prims.convert_element_type"(%20016, %20017) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%20019 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20020 = "torch.aten.div.Scalar"(%20018, %20019) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20021 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%20022 = "torch.aten.pow.Scalar"(%20021, %20020) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20023 = "torch.aten.reciprocal"(%20022) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20024 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%20025 = "torch.aten.mul.Scalar"(%20023, %20024) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%20026 = "torch.aten.reciprocal"(%20025) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20027 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%20028 = "torch.aten.mul.Scalar"(%20026, %20027) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%20029 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%20030 = "torch.aten.gt.Scalar"(%20028, %20029) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20031 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20032 = "torch.aten.div.Scalar"(%20025, %20031) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20033 = "torch.aten.where.self"(%20030, %20032, %20025) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20034 = "torch.aten.reciprocal"(%20028) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20035 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%20036 = "torch.aten.mul.Scalar"(%20034, %20035) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20037 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20038 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20039 = "torch.aten.sub.Scalar"(%20036, %20037, %20038) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%20040 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20041 = "torch.aten.div.Scalar"(%20039, %20040) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20042 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20043 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20044 = "torch.aten.rsub.Scalar"(%20041, %20042, %20043) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%20045 = "torch.aten.mul.Tensor"(%20044, %20033) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20046 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20047 = "torch.aten.div.Scalar"(%20045, %20046) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20048 = "torch.aten.mul.Tensor"(%20041, %20033) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20049 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20050 = "torch.aten.add.Tensor"(%20047, %20048, %20049) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20051 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%20052 = "torch.aten.lt.Scalar"(%20028, %20051) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20053 = "torch.aten.bitwise_not"(%20052) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20054 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%20055 = "torch.aten.gt.Scalar"(%20028, %20054) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20056 = "torch.aten.bitwise_not"(%20055) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20057 = "torch.aten.mul.Tensor"(%20053, %20056) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20058 = "torch.aten.where.self"(%20057, %20050, %20033) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20059 = "torch.prim.ListConstruct"(%20058, %20058) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%20060 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20061 = "torch.aten.cat"(%20059, %20060) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%20062 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20063 = "torch.prims.convert_element_type"(%20008, %20062) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%20064 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20065 = "torch.prims.convert_element_type"(%20061, %20064) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%20066 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%20067 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20068 = "torch.prim.ListConstruct"(%20066, %20067) : (!torch.int, !torch.int) -> !torch.list<int>
%20069 = "torch.aten.view"(%20063, %20068) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%20070 = "torch.aten.mul.Tensor"(%20069, %20065) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%20071 = "torch.aten.cos"(%20070) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%20072 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20073 = "torch.prims.convert_element_type"(%20071, %20072) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%20074 = "torch.aten.sin"(%20070) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%20075 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20076 = "torch.prims.convert_element_type"(%20074, %20075) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%20077 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20078 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20079 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20080 = "torch.aten.slice.Tensor"(%20073, %20077, %20078, %18481, %20079) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20080, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20081 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20082 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20083 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20084 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20085 = "torch.aten.slice.Tensor"(%20080, %20081, %20082, %20083, %20084) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20085, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20086 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20087 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20088 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20089 = "torch.aten.slice.Tensor"(%20076, %20086, %20087, %18481, %20088) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20089, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20090 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20091 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20092 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20093 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20094 = "torch.aten.slice.Tensor"(%20089, %20090, %20091, %20092, %20093) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20094, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20095 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20096 = "torch.aten.unsqueeze"(%20085, %20095) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20096, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20097 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20098 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20099 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20100 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20101 = "torch.aten.slice.Tensor"(%20096, %20097, %20098, %20099, %20100) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20101, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20102 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20103 = "torch.aten.unsqueeze"(%20101, %20102) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20103, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20104 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20105 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20106 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20107 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20108 = "torch.aten.slice.Tensor"(%20103, %20104, %20105, %20106, %20107) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20108, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20109 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20110 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20111 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20112 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20113 = "torch.prim.ListConstruct"(%20109, %20110, %20111, %20112) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20114 = "torch.aten.repeat"(%20108, %20113) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20114, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%20115 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20116 = "torch.aten.unsqueeze"(%20094, %20115) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20116, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20117 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20118 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20119 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20120 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20121 = "torch.aten.slice.Tensor"(%20116, %20117, %20118, %20119, %20120) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20121, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20122 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20123 = "torch.aten.unsqueeze"(%20121, %20122) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20123, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20124 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20125 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20126 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20127 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20128 = "torch.aten.slice.Tensor"(%20123, %20124, %20125, %20126, %20127) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20128, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20129 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20130 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20131 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20132 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20133 = "torch.prim.ListConstruct"(%20129, %20130, %20131, %20132) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20134 = "torch.aten.repeat"(%20128, %20133) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20134, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%20135 = "torch.aten.mul.Tensor"(%19992, %20114) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20135, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20136 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20137 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20138 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20139 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20140 = "torch.aten.slice.Tensor"(%19992, %20136, %20137, %20138, %20139) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20140, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%20141 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20142 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20143 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20144 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20145 = "torch.aten.slice.Tensor"(%19992, %20141, %20142, %20143, %20144) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20145, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%20146 = "torch.aten.neg"(%20145) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20146, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%20147 = "torch.prim.ListConstruct"(%20146, %20140) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%20148 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20149 = "torch.aten.cat"(%20147, %20148) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20149, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20150 = "torch.aten.mul.Tensor"(%20149, %20134) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20150, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20151 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20152 = "torch.aten.add.Tensor"(%20135, %20150, %20151) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20152, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20153 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%20154 = "torch.constant.none"() : () -> !torch.none
%20155 = "torch.constant.none"() : () -> !torch.none
%20156 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%20157 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20158 = "torch.aten.arange"(%20153, %20154, %20155, %20156, %20157) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%20159 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20160 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20161 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20162 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20163 = "torch.constant.none"() : () -> !torch.none
%20164 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%20165 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20166 = "torch.aten.arange.start_step"(%20159, %20160, %20161, %20162, %20163, %20164, %20165) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%20167 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20168 = "torch.prims.convert_element_type"(%20166, %20167) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%20169 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20170 = "torch.aten.div.Scalar"(%20168, %20169) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20171 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%20172 = "torch.aten.pow.Scalar"(%20171, %20170) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20173 = "torch.aten.reciprocal"(%20172) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20174 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%20175 = "torch.aten.mul.Scalar"(%20173, %20174) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%20176 = "torch.aten.reciprocal"(%20175) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20177 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%20178 = "torch.aten.mul.Scalar"(%20176, %20177) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%20179 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%20180 = "torch.aten.gt.Scalar"(%20178, %20179) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20181 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20182 = "torch.aten.div.Scalar"(%20175, %20181) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20183 = "torch.aten.where.self"(%20180, %20182, %20175) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20184 = "torch.aten.reciprocal"(%20178) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20185 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%20186 = "torch.aten.mul.Scalar"(%20184, %20185) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20187 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20188 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20189 = "torch.aten.sub.Scalar"(%20186, %20187, %20188) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%20190 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20191 = "torch.aten.div.Scalar"(%20189, %20190) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20192 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20193 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20194 = "torch.aten.rsub.Scalar"(%20191, %20192, %20193) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%20195 = "torch.aten.mul.Tensor"(%20194, %20183) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20196 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20197 = "torch.aten.div.Scalar"(%20195, %20196) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20198 = "torch.aten.mul.Tensor"(%20191, %20183) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20199 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20200 = "torch.aten.add.Tensor"(%20197, %20198, %20199) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20201 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%20202 = "torch.aten.lt.Scalar"(%20178, %20201) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20203 = "torch.aten.bitwise_not"(%20202) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20204 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%20205 = "torch.aten.gt.Scalar"(%20178, %20204) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20206 = "torch.aten.bitwise_not"(%20205) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20207 = "torch.aten.mul.Tensor"(%20203, %20206) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20208 = "torch.aten.where.self"(%20207, %20200, %20183) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20209 = "torch.prim.ListConstruct"(%20208, %20208) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%20210 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20211 = "torch.aten.cat"(%20209, %20210) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%20212 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20213 = "torch.prims.convert_element_type"(%20158, %20212) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%20214 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20215 = "torch.prims.convert_element_type"(%20211, %20214) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%20216 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%20217 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20218 = "torch.prim.ListConstruct"(%20216, %20217) : (!torch.int, !torch.int) -> !torch.list<int>
%20219 = "torch.aten.view"(%20213, %20218) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%20220 = "torch.aten.mul.Tensor"(%20219, %20215) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%20221 = "torch.aten.cos"(%20220) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%20222 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20223 = "torch.prims.convert_element_type"(%20221, %20222) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%20224 = "torch.aten.sin"(%20220) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%20225 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20226 = "torch.prims.convert_element_type"(%20224, %20225) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%20227 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20228 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20229 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20230 = "torch.aten.slice.Tensor"(%20223, %20227, %20228, %18481, %20229) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20230, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20231 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20232 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20233 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20234 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20235 = "torch.aten.slice.Tensor"(%20230, %20231, %20232, %20233, %20234) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20235, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20236 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20237 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20238 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20239 = "torch.aten.slice.Tensor"(%20226, %20236, %20237, %18481, %20238) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20239, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20240 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20241 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20242 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20243 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20244 = "torch.aten.slice.Tensor"(%20239, %20240, %20241, %20242, %20243) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20244, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20245 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20246 = "torch.aten.unsqueeze"(%20235, %20245) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20246, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20247 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20248 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20249 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20250 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20251 = "torch.aten.slice.Tensor"(%20246, %20247, %20248, %20249, %20250) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20251, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20252 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20253 = "torch.aten.unsqueeze"(%20251, %20252) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20253, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20254 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20255 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20256 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20257 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20258 = "torch.aten.slice.Tensor"(%20253, %20254, %20255, %20256, %20257) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20258, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20259 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20260 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20261 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20262 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20263 = "torch.prim.ListConstruct"(%20259, %20260, %20261, %20262) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20264 = "torch.aten.repeat"(%20258, %20263) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20264, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%20265 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20266 = "torch.aten.unsqueeze"(%20244, %20265) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20266, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20267 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20268 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20269 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20270 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20271 = "torch.aten.slice.Tensor"(%20266, %20267, %20268, %20269, %20270) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20271, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20272 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20273 = "torch.aten.unsqueeze"(%20271, %20272) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20273, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20274 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20275 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20276 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20277 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20278 = "torch.aten.slice.Tensor"(%20273, %20274, %20275, %20276, %20277) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20278, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20279 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20280 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20281 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20282 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20283 = "torch.prim.ListConstruct"(%20279, %20280, %20281, %20282) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20284 = "torch.aten.repeat"(%20278, %20283) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20284, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%20285 = "torch.aten.mul.Tensor"(%19997, %20264) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20285, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20286 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20287 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20288 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20289 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20290 = "torch.aten.slice.Tensor"(%19997, %20286, %20287, %20288, %20289) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20290, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%20291 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20292 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20293 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20294 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20295 = "torch.aten.slice.Tensor"(%19997, %20291, %20292, %20293, %20294) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20295, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%20296 = "torch.aten.neg"(%20295) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20296, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%20297 = "torch.prim.ListConstruct"(%20296, %20290) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%20298 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20299 = "torch.aten.cat"(%20297, %20298) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20299, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20300 = "torch.aten.mul.Tensor"(%20299, %20284) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20300, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20301 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20302 = "torch.aten.add.Tensor"(%20285, %20300, %20301) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20302, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20303 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20304 = "torch.aten.mul.Scalar"(%arg69, %20303) : (!torch.vtensor<[4,?],si64>, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%20304, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%20305 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20306 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20307 = "torch.aten.add.Scalar"(%20304, %20305, %20306) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%20307, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%20308 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20309 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20310 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20311 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20312 = "torch.prim.ListConstruct"(%20308, %18477, %20309, %20310, %20311) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20313 = "torch.aten.view"(%20302, %20312) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20313, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20314 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20315 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20316 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20317 = "torch.prim.ListConstruct"(%19011, %20314, %20315, %20316) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20318 = "torch.aten.view"(%20313, %20317) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20318, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20319 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%20320 = "torch.aten.view"(%20307, %20319) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%20320, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%20321 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20322 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20323 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20324 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20325 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20326 = "torch.prim.ListConstruct"(%18479, %20321, %20322, %20323, %20324, %20325) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20327 = "torch.aten.view"(%19729, %20326) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20327, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20328 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20329 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20330 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20331 = "torch.prim.ListConstruct"(%18993, %20328, %20329, %20330) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20332 = "torch.aten.view"(%20327, %20331) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20332, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20333 = "torch.prim.ListConstruct"(%20320) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%20334 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20335 = "torch.aten.index_put"(%20332, %20333, %20318, %20334) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20335, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20336 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20337 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20338 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20339 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20340 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20341 = "torch.prim.ListConstruct"(%18479, %20336, %20337, %20338, %20339, %20340) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20342 = "torch.aten.view"(%20335, %20341) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20342, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20343 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%20344 = "torch.prim.ListConstruct"(%18479, %20343) : (!torch.int, !torch.int) -> !torch.list<int>
%20345 = "torch.aten.view"(%20342, %20344) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20345, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%20346 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20347 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20348 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20349 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20350 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20351 = "torch.prim.ListConstruct"(%18479, %20346, %20347, %20348, %20349, %20350) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20352 = "torch.aten.view"(%20345, %20351) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20352, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20353 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20354 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20355 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20356 = "torch.prim.ListConstruct"(%18993, %20353, %20354, %20355) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20357 = "torch.aten.view"(%20352, %20356) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20357, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20358 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20359 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20360 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20361 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20362 = "torch.prim.ListConstruct"(%20358, %18477, %20359, %20360, %20361) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20363 = "torch.aten.view"(%20002, %20362) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20363, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20364 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20365 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20366 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20367 = "torch.prim.ListConstruct"(%19011, %20364, %20365, %20366) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20368 = "torch.aten.view"(%20363, %20367) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20368, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20369 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20370 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20371 = "torch.aten.add.Scalar"(%20307, %20369, %20370) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%20371, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%20372 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%20373 = "torch.aten.view"(%20371, %20372) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%20373, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%20374 = "torch.prim.ListConstruct"(%20373) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%20375 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20376 = "torch.aten.index_put"(%20357, %20374, %20368, %20375) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20376, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20377 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20378 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20379 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20380 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20381 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20382 = "torch.prim.ListConstruct"(%18479, %20377, %20378, %20379, %20380, %20381) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20383 = "torch.aten.view"(%20376, %20382) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20383, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20384 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%20385 = "torch.prim.ListConstruct"(%18479, %20384) : (!torch.int, !torch.int) -> !torch.list<int>
%20386 = "torch.aten.view"(%20383, %20385) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20386, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%20387 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%20388 = "torch.aten.unsqueeze"(%20302, %20387) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20388, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%20389 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20390 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20391 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20392 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20393 = "torch.prim.ListConstruct"(%20389, %18481, %20390, %20391, %20392) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20394 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20395 = "torch.aten.expand"(%20388, %20393, %20394) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20395, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%20396 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20397 = "torch.aten.clone"(%20395, %20396) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20397, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%20398 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20399 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20400 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20401 = "torch.prim.ListConstruct"(%20398, %18481, %20399, %20400) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20402 = "torch.aten._unsafe_view"(%20397, %20401) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20402, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20403 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%20404 = "torch.aten.unsqueeze"(%20002, %20403) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20404, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%20405 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20406 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20407 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20408 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20409 = "torch.prim.ListConstruct"(%20405, %18481, %20406, %20407, %20408) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20410 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20411 = "torch.aten.expand"(%20404, %20409, %20410) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20411, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%20412 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20413 = "torch.aten.clone"(%20411, %20412) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20413, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%20414 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20415 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20416 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20417 = "torch.prim.ListConstruct"(%20414, %18481, %20415, %20416) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20418 = "torch.aten._unsafe_view"(%20413, %20417) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20418, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20419 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20420 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20421 = "torch.aten.transpose.int"(%20152, %20419, %20420) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20421, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%20422 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20423 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20424 = "torch.aten.transpose.int"(%20402, %20422, %20423) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20424, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%20425 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20426 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20427 = "torch.aten.transpose.int"(%20418, %20425, %20426) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20427, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%20428 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20429 = "torch.aten.squeeze.dim"(%18570, %20428) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20429, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%20430 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20431 = "torch.aten.squeeze.dim"(%20429, %20430) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20431, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%20432 = "torch_c.to_builtin_tensor"(%20421) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%20433 = "torch_c.to_builtin_tensor"(%20424) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%20434 = "torch_c.to_builtin_tensor"(%20427) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%20435 = "torch_c.to_builtin_tensor"(%20431) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>) -> tensor<4x1x?x?xf8E4M3FNUZ>
%20436 = "tensor.cast"(%20435) : (tensor<4x1x?x?xf8E4M3FNUZ>) -> tensor<?x?x?x?xf8E4M3FNUZ>
%20437 = "torch_c.to_builtin_tensor"(%17289) : (!torch.vtensor<[],f32>) -> tensor<f32>
%20438 = "util.call"(%20432, %20433, %20434, %20437, %20436) <{callee = @sharktank_masked_flash_attention_4_32_128_128_f8E4M3FNUZ_f32_f32}> : (tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<f32>, tensor<?x?x?x?xf8E4M3FNUZ>) -> tensor<4x32x?x128xf32>
%20439 = "torch_c.from_builtin_tensor"(%20438) : (tensor<4x32x?x128xf32>) -> !torch.vtensor<[4,32,?,128],f32>
"torch.bind_symbolic_shape"(%20439, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f32>, !torch.int) -> ()
%20440 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20441 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20442 = "torch.aten.transpose.int"(%20439, %20440, %20441) : (!torch.vtensor<[4,32,?,128],f32>, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%20442, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%20443 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20444 = "torch.aten.clone"(%20442, %20443) : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%20444, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%20445 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20446 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20447 = "torch.prim.ListConstruct"(%20445, %18481, %20446) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20448 = "torch.aten._unsafe_view"(%20444, %20447) : (!torch.vtensor<[4,?,32,128],f32>, !torch.list<int>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20448, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20449 = "torch.aten.div.Tensor"(%20448, %17291) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20449, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20450 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20451 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20452 = "torch.aten.clamp"(%20449, %20450, %20451) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20452, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20453 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20454 = "torch.prims.convert_element_type"(%20452, %20453) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20454, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%20455 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20456 = "torch.aten.unsqueeze"(%17293, %20455) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%20457 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20458 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20459 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20460 = "torch.prim.ListConstruct"(%20457, %20458, %20459) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20461 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20462 = "torch.aten.expand"(%20456, %20460, %20461) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%20463 = "torch_c.to_builtin_tensor"(%20454) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%20464 = "torch_c.to_builtin_tensor"(%20462) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%20465 = "util.call"(%20463, %20464) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%20466 = "torch_c.from_builtin_tensor"(%20465) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20466, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20467 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20468 = "torch.prims.convert_element_type"(%20466, %20467) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20468, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20469 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20470 = "torch.aten.add.Tensor"(%19896, %20468, %20469) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20470, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20471 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20472 = "torch.prims.convert_element_type"(%20470, %20471) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20472, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20473 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20474 = "torch.aten.pow.Tensor_Scalar"(%20472, %20473) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20474, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20475 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20476 = "torch.prim.ListConstruct"(%20475) : (!torch.int) -> !torch.list<int>
%20477 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%20478 = "torch.constant.none"() : () -> !torch.none
%20479 = "torch.aten.mean.dim"(%20474, %20476, %20477, %20478) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%20479, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%20480 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%20481 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20482 = "torch.aten.add.Scalar"(%20479, %20480, %20481) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%20482, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%20483 = "torch.aten.rsqrt"(%20482) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%20483, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%20484 = "torch.aten.mul.Tensor"(%20472, %20483) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20484, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20485 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20486 = "torch.prims.convert_element_type"(%20484, %20485) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20486, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20487 = "torch.aten.mul.Tensor"(%17295, %20486) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20487, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20488 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20489 = "torch.prims.convert_element_type"(%20487, %20488) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20489, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20490 = "torch.aten.div.Tensor"(%20489, %17297) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20490, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20491 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20492 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20493 = "torch.aten.clamp"(%20490, %20491, %20492) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20493, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20494 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20495 = "torch.prims.convert_element_type"(%20493, %20494) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20495, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%20496 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20497 = "torch.aten.unsqueeze"(%17299, %20496) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%20498 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20499 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%20500 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20501 = "torch.prim.ListConstruct"(%20498, %20499, %20500) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20502 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20503 = "torch.aten.expand"(%20497, %20501, %20502) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%20504 = "torch_c.to_builtin_tensor"(%20495) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%20505 = "torch_c.to_builtin_tensor"(%20503) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%20506 = "util.call"(%20504, %20505) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%20507 = "torch_c.from_builtin_tensor"(%20506) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%20507, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%20508 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20509 = "torch.prims.convert_element_type"(%20507, %20508) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%20509, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%20510 = "torch.aten.silu"(%20509) : (!torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%20510, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%20511 = "torch.aten.div.Tensor"(%20489, %17301) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20511, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20512 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20513 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20514 = "torch.aten.clamp"(%20511, %20512, %20513) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20514, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20515 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20516 = "torch.prims.convert_element_type"(%20514, %20515) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20516, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%20517 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20518 = "torch.aten.unsqueeze"(%17303, %20517) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%20519 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20520 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%20521 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20522 = "torch.prim.ListConstruct"(%20519, %20520, %20521) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20523 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20524 = "torch.aten.expand"(%20518, %20522, %20523) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%20525 = "torch_c.to_builtin_tensor"(%20516) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%20526 = "torch_c.to_builtin_tensor"(%20524) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%20527 = "util.call"(%20525, %20526) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%20528 = "torch_c.from_builtin_tensor"(%20527) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%20528, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%20529 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20530 = "torch.prims.convert_element_type"(%20528, %20529) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%20530, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%20531 = "torch.aten.mul.Tensor"(%20510, %20530) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%20531, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%20532 = "torch.aten.div.Tensor"(%20531, %17305) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%20532, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%20533 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20534 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20535 = "torch.aten.clamp"(%20532, %20533, %20534) : (!torch.vtensor<[4,?,14336],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%20535, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%20536 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20537 = "torch.prims.convert_element_type"(%20535, %20536) : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> !torch.vtensor<[4,?,14336],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20537, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>, !torch.int) -> ()
%20538 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20539 = "torch.aten.unsqueeze"(%17307, %20538) : (!torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,14336],f8E4M3FNUZ>
%20540 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20541 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20542 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%20543 = "torch.prim.ListConstruct"(%20540, %20541, %20542) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20544 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20545 = "torch.aten.expand"(%20539, %20543, %20544) : (!torch.vtensor<[1,4096,14336],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,14336],f8E4M3FNUZ>
%20546 = "torch_c.to_builtin_tensor"(%20537) : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>) -> tensor<4x?x14336xf8E4M3FNUZ>
%20547 = "torch_c.to_builtin_tensor"(%20545) : (!torch.vtensor<[4,4096,14336],f8E4M3FNUZ>) -> tensor<4x4096x14336xf8E4M3FNUZ>
%20548 = "util.call"(%20546, %20547) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx14336xf8E4M3FNUZ_R4x4096x14336xf8E4M3FNUZ}> : (tensor<4x?x14336xf8E4M3FNUZ>, tensor<4x4096x14336xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%20549 = "torch_c.from_builtin_tensor"(%20548) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20549, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20550 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20551 = "torch.prims.convert_element_type"(%20549, %20550) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20551, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20552 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20553 = "torch.aten.add.Tensor"(%20470, %20551, %20552) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20553, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20554 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20555 = "torch.prims.convert_element_type"(%20553, %20554) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20555, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20556 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20557 = "torch.aten.pow.Tensor_Scalar"(%20555, %20556) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20557, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20558 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20559 = "torch.prim.ListConstruct"(%20558) : (!torch.int) -> !torch.list<int>
%20560 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%20561 = "torch.constant.none"() : () -> !torch.none
%20562 = "torch.aten.mean.dim"(%20557, %20559, %20560, %20561) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%20562, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%20563 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%20564 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20565 = "torch.aten.add.Scalar"(%20562, %20563, %20564) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%20565, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%20566 = "torch.aten.rsqrt"(%20565) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%20566, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%20567 = "torch.aten.mul.Tensor"(%20555, %20566) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20567, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20568 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20569 = "torch.prims.convert_element_type"(%20567, %20568) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20569, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20570 = "torch.aten.mul.Tensor"(%17309, %20569) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20570, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20571 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20572 = "torch.prims.convert_element_type"(%20570, %20571) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20572, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20573 = "torch.aten.div.Tensor"(%20572, %17311) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20573, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20574 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20575 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20576 = "torch.aten.clamp"(%20573, %20574, %20575) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20576, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20577 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20578 = "torch.prims.convert_element_type"(%20576, %20577) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20578, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%20579 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20580 = "torch.aten.unsqueeze"(%17313, %20579) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%20581 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20582 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20583 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20584 = "torch.prim.ListConstruct"(%20581, %20582, %20583) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20585 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20586 = "torch.aten.expand"(%20580, %20584, %20585) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%20587 = "torch_c.to_builtin_tensor"(%20578) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%20588 = "torch_c.to_builtin_tensor"(%20586) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%20589 = "util.call"(%20587, %20588) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%20590 = "torch_c.from_builtin_tensor"(%20589) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20590, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20591 = "torch.aten.div.Tensor"(%20590, %17315) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20591, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20592 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20593 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20594 = "torch.aten.clamp"(%20591, %20592, %20593) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%20594, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%20595 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20596 = "torch.prims.convert_element_type"(%20594, %20595) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20596, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%20597 = "torch.aten.div.Tensor"(%20572, %17317) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20597, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20598 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20599 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20600 = "torch.aten.clamp"(%20597, %20598, %20599) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20600, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20601 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20602 = "torch.prims.convert_element_type"(%20600, %20601) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20602, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%20603 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20604 = "torch.aten.unsqueeze"(%17319, %20603) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%20605 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20606 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%20607 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20608 = "torch.prim.ListConstruct"(%20605, %20606, %20607) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20609 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20610 = "torch.aten.expand"(%20604, %20608, %20609) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%20611 = "torch_c.to_builtin_tensor"(%20602) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%20612 = "torch_c.to_builtin_tensor"(%20610) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%20613 = "util.call"(%20611, %20612) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%20614 = "torch_c.from_builtin_tensor"(%20613) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%20614, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%20615 = "torch.aten.div.Tensor"(%20614, %17321) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%20615, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%20616 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20617 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20618 = "torch.aten.clamp"(%20615, %20616, %20617) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%20618, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%20619 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20620 = "torch.prims.convert_element_type"(%20618, %20619) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20620, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%20621 = "torch.aten.div.Tensor"(%20572, %17323) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20621, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20622 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20623 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20624 = "torch.aten.clamp"(%20621, %20622, %20623) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%20624, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%20625 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20626 = "torch.prims.convert_element_type"(%20624, %20625) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20626, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%20627 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20628 = "torch.aten.unsqueeze"(%17325, %20627) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%20629 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20630 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%20631 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%20632 = "torch.prim.ListConstruct"(%20629, %20630, %20631) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20633 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20634 = "torch.aten.expand"(%20628, %20632, %20633) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%20635 = "torch_c.to_builtin_tensor"(%20626) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%20636 = "torch_c.to_builtin_tensor"(%20634) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%20637 = "util.call"(%20635, %20636) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%20638 = "torch_c.from_builtin_tensor"(%20637) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%20638, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%20639 = "torch.aten.div.Tensor"(%20638, %17327) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%20639, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%20640 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%20641 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%20642 = "torch.aten.clamp"(%20639, %20640, %20641) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%20642, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%20643 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%20644 = "torch.prims.convert_element_type"(%20642, %20643) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20644, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%20645 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20646 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20647 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20648 = "torch.prim.ListConstruct"(%20645, %18481, %20646, %20647) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20649 = "torch.aten.view"(%20596, %20648) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20649, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20650 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20651 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20652 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20653 = "torch.prim.ListConstruct"(%20650, %18481, %20651, %20652) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20654 = "torch.aten.view"(%20620, %20653) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20654, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20655 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20656 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20657 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20658 = "torch.prim.ListConstruct"(%20655, %18481, %20656, %20657) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20659 = "torch.aten.view"(%20644, %20658) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20659, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20660 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%20661 = "torch.constant.none"() : () -> !torch.none
%20662 = "torch.constant.none"() : () -> !torch.none
%20663 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%20664 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20665 = "torch.aten.arange"(%20660, %20661, %20662, %20663, %20664) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%20666 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20667 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20668 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20669 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20670 = "torch.constant.none"() : () -> !torch.none
%20671 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%20672 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20673 = "torch.aten.arange.start_step"(%20666, %20667, %20668, %20669, %20670, %20671, %20672) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%20674 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20675 = "torch.prims.convert_element_type"(%20673, %20674) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%20676 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20677 = "torch.aten.div.Scalar"(%20675, %20676) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20678 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%20679 = "torch.aten.pow.Scalar"(%20678, %20677) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20680 = "torch.aten.reciprocal"(%20679) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20681 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%20682 = "torch.aten.mul.Scalar"(%20680, %20681) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%20683 = "torch.aten.reciprocal"(%20682) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20684 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%20685 = "torch.aten.mul.Scalar"(%20683, %20684) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%20686 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%20687 = "torch.aten.gt.Scalar"(%20685, %20686) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20688 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20689 = "torch.aten.div.Scalar"(%20682, %20688) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20690 = "torch.aten.where.self"(%20687, %20689, %20682) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20691 = "torch.aten.reciprocal"(%20685) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20692 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%20693 = "torch.aten.mul.Scalar"(%20691, %20692) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20694 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20695 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20696 = "torch.aten.sub.Scalar"(%20693, %20694, %20695) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%20697 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20698 = "torch.aten.div.Scalar"(%20696, %20697) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20699 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20700 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20701 = "torch.aten.rsub.Scalar"(%20698, %20699, %20700) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%20702 = "torch.aten.mul.Tensor"(%20701, %20690) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20703 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20704 = "torch.aten.div.Scalar"(%20702, %20703) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20705 = "torch.aten.mul.Tensor"(%20698, %20690) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20706 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20707 = "torch.aten.add.Tensor"(%20704, %20705, %20706) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20708 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%20709 = "torch.aten.lt.Scalar"(%20685, %20708) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20710 = "torch.aten.bitwise_not"(%20709) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20711 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%20712 = "torch.aten.gt.Scalar"(%20685, %20711) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20713 = "torch.aten.bitwise_not"(%20712) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20714 = "torch.aten.mul.Tensor"(%20710, %20713) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20715 = "torch.aten.where.self"(%20714, %20707, %20690) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20716 = "torch.prim.ListConstruct"(%20715, %20715) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%20717 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20718 = "torch.aten.cat"(%20716, %20717) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%20719 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20720 = "torch.prims.convert_element_type"(%20665, %20719) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%20721 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20722 = "torch.prims.convert_element_type"(%20718, %20721) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%20723 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%20724 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20725 = "torch.prim.ListConstruct"(%20723, %20724) : (!torch.int, !torch.int) -> !torch.list<int>
%20726 = "torch.aten.view"(%20720, %20725) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%20727 = "torch.aten.mul.Tensor"(%20726, %20722) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%20728 = "torch.aten.cos"(%20727) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%20729 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20730 = "torch.prims.convert_element_type"(%20728, %20729) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%20731 = "torch.aten.sin"(%20727) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%20732 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20733 = "torch.prims.convert_element_type"(%20731, %20732) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%20734 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20735 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20736 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20737 = "torch.aten.slice.Tensor"(%20730, %20734, %20735, %18481, %20736) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20737, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20738 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20739 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20740 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20741 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20742 = "torch.aten.slice.Tensor"(%20737, %20738, %20739, %20740, %20741) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20742, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20743 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20744 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20745 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20746 = "torch.aten.slice.Tensor"(%20733, %20743, %20744, %18481, %20745) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20746, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20747 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20748 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20749 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20750 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20751 = "torch.aten.slice.Tensor"(%20746, %20747, %20748, %20749, %20750) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20751, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20752 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20753 = "torch.aten.unsqueeze"(%20742, %20752) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20753, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20754 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20755 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20756 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20757 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20758 = "torch.aten.slice.Tensor"(%20753, %20754, %20755, %20756, %20757) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20758, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20759 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20760 = "torch.aten.unsqueeze"(%20758, %20759) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20760, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20761 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20762 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20763 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20764 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20765 = "torch.aten.slice.Tensor"(%20760, %20761, %20762, %20763, %20764) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20765, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20766 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20767 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20768 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20769 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20770 = "torch.prim.ListConstruct"(%20766, %20767, %20768, %20769) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20771 = "torch.aten.repeat"(%20765, %20770) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20771, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%20772 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20773 = "torch.aten.unsqueeze"(%20751, %20772) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20773, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20774 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20775 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20776 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20777 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20778 = "torch.aten.slice.Tensor"(%20773, %20774, %20775, %20776, %20777) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20778, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20779 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20780 = "torch.aten.unsqueeze"(%20778, %20779) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20780, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20781 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20782 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20783 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20784 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20785 = "torch.aten.slice.Tensor"(%20780, %20781, %20782, %20783, %20784) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20785, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20786 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20787 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20788 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20789 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20790 = "torch.prim.ListConstruct"(%20786, %20787, %20788, %20789) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20791 = "torch.aten.repeat"(%20785, %20790) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20791, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%20792 = "torch.aten.mul.Tensor"(%20649, %20771) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20792, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20793 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20794 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20795 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20796 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20797 = "torch.aten.slice.Tensor"(%20649, %20793, %20794, %20795, %20796) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20797, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%20798 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20799 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20800 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20801 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20802 = "torch.aten.slice.Tensor"(%20649, %20798, %20799, %20800, %20801) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20802, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%20803 = "torch.aten.neg"(%20802) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20803, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%20804 = "torch.prim.ListConstruct"(%20803, %20797) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%20805 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20806 = "torch.aten.cat"(%20804, %20805) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20806, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20807 = "torch.aten.mul.Tensor"(%20806, %20791) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20807, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20808 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20809 = "torch.aten.add.Tensor"(%20792, %20807, %20808) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20809, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%20810 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%20811 = "torch.constant.none"() : () -> !torch.none
%20812 = "torch.constant.none"() : () -> !torch.none
%20813 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%20814 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20815 = "torch.aten.arange"(%20810, %20811, %20812, %20813, %20814) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%20816 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20817 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20818 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20819 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20820 = "torch.constant.none"() : () -> !torch.none
%20821 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%20822 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20823 = "torch.aten.arange.start_step"(%20816, %20817, %20818, %20819, %20820, %20821, %20822) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%20824 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20825 = "torch.prims.convert_element_type"(%20823, %20824) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%20826 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20827 = "torch.aten.div.Scalar"(%20825, %20826) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20828 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%20829 = "torch.aten.pow.Scalar"(%20828, %20827) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20830 = "torch.aten.reciprocal"(%20829) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20831 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%20832 = "torch.aten.mul.Scalar"(%20830, %20831) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%20833 = "torch.aten.reciprocal"(%20832) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20834 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%20835 = "torch.aten.mul.Scalar"(%20833, %20834) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%20836 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%20837 = "torch.aten.gt.Scalar"(%20835, %20836) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20838 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20839 = "torch.aten.div.Scalar"(%20832, %20838) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20840 = "torch.aten.where.self"(%20837, %20839, %20832) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20841 = "torch.aten.reciprocal"(%20835) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20842 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%20843 = "torch.aten.mul.Scalar"(%20841, %20842) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20844 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20845 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20846 = "torch.aten.sub.Scalar"(%20843, %20844, %20845) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%20847 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20848 = "torch.aten.div.Scalar"(%20846, %20847) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20849 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20850 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20851 = "torch.aten.rsub.Scalar"(%20848, %20849, %20850) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%20852 = "torch.aten.mul.Tensor"(%20851, %20840) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20853 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20854 = "torch.aten.div.Scalar"(%20852, %20853) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20855 = "torch.aten.mul.Tensor"(%20848, %20840) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20856 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20857 = "torch.aten.add.Tensor"(%20854, %20855, %20856) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%20858 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%20859 = "torch.aten.lt.Scalar"(%20835, %20858) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20860 = "torch.aten.bitwise_not"(%20859) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20861 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%20862 = "torch.aten.gt.Scalar"(%20835, %20861) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%20863 = "torch.aten.bitwise_not"(%20862) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20864 = "torch.aten.mul.Tensor"(%20860, %20863) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%20865 = "torch.aten.where.self"(%20864, %20857, %20840) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%20866 = "torch.prim.ListConstruct"(%20865, %20865) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%20867 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20868 = "torch.aten.cat"(%20866, %20867) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%20869 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20870 = "torch.prims.convert_element_type"(%20815, %20869) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%20871 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20872 = "torch.prims.convert_element_type"(%20868, %20871) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%20873 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%20874 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20875 = "torch.prim.ListConstruct"(%20873, %20874) : (!torch.int, !torch.int) -> !torch.list<int>
%20876 = "torch.aten.view"(%20870, %20875) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%20877 = "torch.aten.mul.Tensor"(%20876, %20872) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%20878 = "torch.aten.cos"(%20877) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%20879 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20880 = "torch.prims.convert_element_type"(%20878, %20879) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%20881 = "torch.aten.sin"(%20877) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%20882 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%20883 = "torch.prims.convert_element_type"(%20881, %20882) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%20884 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20885 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20886 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20887 = "torch.aten.slice.Tensor"(%20880, %20884, %20885, %18481, %20886) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20887, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20888 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20889 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20890 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20891 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20892 = "torch.aten.slice.Tensor"(%20887, %20888, %20889, %20890, %20891) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20892, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20893 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20894 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20895 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20896 = "torch.aten.slice.Tensor"(%20883, %20893, %20894, %18481, %20895) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20896, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20897 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20898 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20899 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20900 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20901 = "torch.aten.slice.Tensor"(%20896, %20897, %20898, %20899, %20900) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%20901, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%20902 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20903 = "torch.aten.unsqueeze"(%20892, %20902) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20903, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20904 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20905 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20906 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20907 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20908 = "torch.aten.slice.Tensor"(%20903, %20904, %20905, %20906, %20907) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20908, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20909 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20910 = "torch.aten.unsqueeze"(%20908, %20909) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20910, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20911 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20912 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20913 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20914 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20915 = "torch.aten.slice.Tensor"(%20910, %20911, %20912, %20913, %20914) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20915, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20916 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20917 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20918 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20919 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20920 = "torch.prim.ListConstruct"(%20916, %20917, %20918, %20919) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20921 = "torch.aten.repeat"(%20915, %20920) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20921, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%20922 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20923 = "torch.aten.unsqueeze"(%20901, %20922) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20923, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20924 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20925 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20926 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20927 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20928 = "torch.aten.slice.Tensor"(%20923, %20924, %20925, %20926, %20927) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%20928, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%20929 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20930 = "torch.aten.unsqueeze"(%20928, %20929) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20930, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20931 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20932 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20933 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20934 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20935 = "torch.aten.slice.Tensor"(%20930, %20931, %20932, %20933, %20934) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20935, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%20936 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20937 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20938 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20939 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20940 = "torch.prim.ListConstruct"(%20936, %20937, %20938, %20939) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20941 = "torch.aten.repeat"(%20935, %20940) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%20941, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%20942 = "torch.aten.mul.Tensor"(%20654, %20921) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20942, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20943 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20944 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%20945 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20946 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20947 = "torch.aten.slice.Tensor"(%20654, %20943, %20944, %20945, %20946) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20947, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%20948 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%20949 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20950 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%20951 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20952 = "torch.aten.slice.Tensor"(%20654, %20948, %20949, %20950, %20951) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20952, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%20953 = "torch.aten.neg"(%20952) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20953, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%20954 = "torch.prim.ListConstruct"(%20953, %20947) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%20955 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%20956 = "torch.aten.cat"(%20954, %20955) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20956, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20957 = "torch.aten.mul.Tensor"(%20956, %20941) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20957, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20958 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20959 = "torch.aten.add.Tensor"(%20942, %20957, %20958) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20959, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20960 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%20961 = "torch.aten.mul.Scalar"(%arg69, %20960) : (!torch.vtensor<[4,?],si64>, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%20961, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%20962 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%20963 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%20964 = "torch.aten.add.Scalar"(%20961, %20962, %20963) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%20964, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%20965 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%20966 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20967 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20968 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20969 = "torch.prim.ListConstruct"(%20965, %18477, %20966, %20967, %20968) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20970 = "torch.aten.view"(%20959, %20969) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20970, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20971 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20972 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20973 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20974 = "torch.prim.ListConstruct"(%19011, %20971, %20972, %20973) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20975 = "torch.aten.view"(%20970, %20974) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20975, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20976 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%20977 = "torch.aten.view"(%20964, %20976) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%20977, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%20978 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20979 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20980 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20981 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20982 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20983 = "torch.prim.ListConstruct"(%18479, %20978, %20979, %20980, %20981, %20982) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20984 = "torch.aten.view"(%20386, %20983) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20984, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20985 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20986 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20987 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20988 = "torch.prim.ListConstruct"(%18993, %20985, %20986, %20987) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20989 = "torch.aten.view"(%20984, %20988) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20989, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20990 = "torch.prim.ListConstruct"(%20977) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%20991 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%20992 = "torch.aten.index_put"(%20989, %20990, %20975, %20991) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20992, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%20993 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20994 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%20995 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%20996 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%20997 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%20998 = "torch.prim.ListConstruct"(%18479, %20993, %20994, %20995, %20996, %20997) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%20999 = "torch.aten.view"(%20992, %20998) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%20999, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21000 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%21001 = "torch.prim.ListConstruct"(%18479, %21000) : (!torch.int, !torch.int) -> !torch.list<int>
%21002 = "torch.aten.view"(%20999, %21001) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21002, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%21003 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21004 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21005 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21006 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21007 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21008 = "torch.prim.ListConstruct"(%18479, %21003, %21004, %21005, %21006, %21007) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21009 = "torch.aten.view"(%21002, %21008) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21009, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21010 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21011 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21012 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21013 = "torch.prim.ListConstruct"(%18993, %21010, %21011, %21012) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21014 = "torch.aten.view"(%21009, %21013) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21014, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21015 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21016 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21017 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21018 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21019 = "torch.prim.ListConstruct"(%21015, %18477, %21016, %21017, %21018) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21020 = "torch.aten.view"(%20659, %21019) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21020, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21021 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21022 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21023 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21024 = "torch.prim.ListConstruct"(%19011, %21021, %21022, %21023) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21025 = "torch.aten.view"(%21020, %21024) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21025, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21026 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21027 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21028 = "torch.aten.add.Scalar"(%20964, %21026, %21027) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%21028, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%21029 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%21030 = "torch.aten.view"(%21028, %21029) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%21030, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%21031 = "torch.prim.ListConstruct"(%21030) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%21032 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21033 = "torch.aten.index_put"(%21014, %21031, %21025, %21032) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21033, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21034 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21035 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21036 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21037 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21038 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21039 = "torch.prim.ListConstruct"(%18479, %21034, %21035, %21036, %21037, %21038) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21040 = "torch.aten.view"(%21033, %21039) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21040, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21041 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%21042 = "torch.prim.ListConstruct"(%18479, %21041) : (!torch.int, !torch.int) -> !torch.list<int>
%21043 = "torch.aten.view"(%21040, %21042) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21043, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%21044 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%21045 = "torch.aten.unsqueeze"(%20959, %21044) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21045, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%21046 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21047 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21048 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21049 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21050 = "torch.prim.ListConstruct"(%21046, %18481, %21047, %21048, %21049) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21051 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21052 = "torch.aten.expand"(%21045, %21050, %21051) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21052, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%21053 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21054 = "torch.aten.clone"(%21052, %21053) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21054, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%21055 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21056 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21057 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21058 = "torch.prim.ListConstruct"(%21055, %18481, %21056, %21057) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21059 = "torch.aten._unsafe_view"(%21054, %21058) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21059, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21060 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%21061 = "torch.aten.unsqueeze"(%20659, %21060) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21061, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%21062 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21063 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21064 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21065 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21066 = "torch.prim.ListConstruct"(%21062, %18481, %21063, %21064, %21065) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21067 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21068 = "torch.aten.expand"(%21061, %21066, %21067) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21068, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%21069 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21070 = "torch.aten.clone"(%21068, %21069) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21070, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%21071 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21072 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21073 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21074 = "torch.prim.ListConstruct"(%21071, %18481, %21072, %21073) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21075 = "torch.aten._unsafe_view"(%21070, %21074) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21075, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21076 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21077 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21078 = "torch.aten.transpose.int"(%20809, %21076, %21077) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21078, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%21079 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21080 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21081 = "torch.aten.transpose.int"(%21059, %21079, %21080) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21081, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%21082 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21083 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21084 = "torch.aten.transpose.int"(%21075, %21082, %21083) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21084, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%21085 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21086 = "torch.aten.squeeze.dim"(%18570, %21085) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21086, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%21087 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21088 = "torch.aten.squeeze.dim"(%21086, %21087) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21088, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%21089 = "torch_c.to_builtin_tensor"(%21078) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%21090 = "torch_c.to_builtin_tensor"(%21081) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%21091 = "torch_c.to_builtin_tensor"(%21084) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%21092 = "torch_c.to_builtin_tensor"(%21088) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>) -> tensor<4x1x?x?xf8E4M3FNUZ>
%21093 = "tensor.cast"(%21092) : (tensor<4x1x?x?xf8E4M3FNUZ>) -> tensor<?x?x?x?xf8E4M3FNUZ>
%21094 = "torch_c.to_builtin_tensor"(%17329) : (!torch.vtensor<[],f32>) -> tensor<f32>
%21095 = "util.call"(%21089, %21090, %21091, %21094, %21093) <{callee = @sharktank_masked_flash_attention_4_32_128_128_f8E4M3FNUZ_f32_f32}> : (tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<f32>, tensor<?x?x?x?xf8E4M3FNUZ>) -> tensor<4x32x?x128xf32>
%21096 = "torch_c.from_builtin_tensor"(%21095) : (tensor<4x32x?x128xf32>) -> !torch.vtensor<[4,32,?,128],f32>
"torch.bind_symbolic_shape"(%21096, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f32>, !torch.int) -> ()
%21097 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21098 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21099 = "torch.aten.transpose.int"(%21096, %21097, %21098) : (!torch.vtensor<[4,32,?,128],f32>, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%21099, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%21100 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21101 = "torch.aten.clone"(%21099, %21100) : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%21101, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%21102 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21103 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21104 = "torch.prim.ListConstruct"(%21102, %18481, %21103) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21105 = "torch.aten._unsafe_view"(%21101, %21104) : (!torch.vtensor<[4,?,32,128],f32>, !torch.list<int>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21105, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21106 = "torch.aten.div.Tensor"(%21105, %17331) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21106, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21107 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21108 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21109 = "torch.aten.clamp"(%21106, %21107, %21108) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21109, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21110 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21111 = "torch.prims.convert_element_type"(%21109, %21110) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21111, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21112 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21113 = "torch.aten.unsqueeze"(%17333, %21112) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%21114 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21115 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21116 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21117 = "torch.prim.ListConstruct"(%21114, %21115, %21116) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21118 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21119 = "torch.aten.expand"(%21113, %21117, %21118) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%21120 = "torch_c.to_builtin_tensor"(%21111) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21121 = "torch_c.to_builtin_tensor"(%21119) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%21122 = "util.call"(%21120, %21121) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%21123 = "torch_c.from_builtin_tensor"(%21122) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21123, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21124 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21125 = "torch.prims.convert_element_type"(%21123, %21124) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21125, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21126 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21127 = "torch.aten.add.Tensor"(%20553, %21125, %21126) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21127, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21128 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21129 = "torch.prims.convert_element_type"(%21127, %21128) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21129, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21130 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21131 = "torch.aten.pow.Tensor_Scalar"(%21129, %21130) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21131, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21132 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%21133 = "torch.prim.ListConstruct"(%21132) : (!torch.int) -> !torch.list<int>
%21134 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%21135 = "torch.constant.none"() : () -> !torch.none
%21136 = "torch.aten.mean.dim"(%21131, %21133, %21134, %21135) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21136, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21137 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%21138 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21139 = "torch.aten.add.Scalar"(%21136, %21137, %21138) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21139, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21140 = "torch.aten.rsqrt"(%21139) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21140, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21141 = "torch.aten.mul.Tensor"(%21129, %21140) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21141, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21142 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21143 = "torch.prims.convert_element_type"(%21141, %21142) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21143, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21144 = "torch.aten.mul.Tensor"(%17335, %21143) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21144, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21145 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21146 = "torch.prims.convert_element_type"(%21144, %21145) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21146, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21147 = "torch.aten.div.Tensor"(%21146, %17337) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21147, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21148 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21149 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21150 = "torch.aten.clamp"(%21147, %21148, %21149) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21150, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21151 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21152 = "torch.prims.convert_element_type"(%21150, %21151) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21152, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21153 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21154 = "torch.aten.unsqueeze"(%17339, %21153) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%21155 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21156 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%21157 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21158 = "torch.prim.ListConstruct"(%21155, %21156, %21157) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21159 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21160 = "torch.aten.expand"(%21154, %21158, %21159) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%21161 = "torch_c.to_builtin_tensor"(%21152) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21162 = "torch_c.to_builtin_tensor"(%21160) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%21163 = "util.call"(%21161, %21162) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%21164 = "torch_c.from_builtin_tensor"(%21163) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%21164, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%21165 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21166 = "torch.prims.convert_element_type"(%21164, %21165) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21166, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21167 = "torch.aten.silu"(%21166) : (!torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21167, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21168 = "torch.aten.div.Tensor"(%21146, %17341) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21168, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21169 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21170 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21171 = "torch.aten.clamp"(%21168, %21169, %21170) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21171, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21172 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21173 = "torch.prims.convert_element_type"(%21171, %21172) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21173, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21174 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21175 = "torch.aten.unsqueeze"(%17343, %21174) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%21176 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21177 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%21178 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21179 = "torch.prim.ListConstruct"(%21176, %21177, %21178) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21180 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21181 = "torch.aten.expand"(%21175, %21179, %21180) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%21182 = "torch_c.to_builtin_tensor"(%21173) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21183 = "torch_c.to_builtin_tensor"(%21181) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%21184 = "util.call"(%21182, %21183) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%21185 = "torch_c.from_builtin_tensor"(%21184) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%21185, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%21186 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21187 = "torch.prims.convert_element_type"(%21185, %21186) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21187, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21188 = "torch.aten.mul.Tensor"(%21167, %21187) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21188, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21189 = "torch.aten.div.Tensor"(%21188, %17345) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21189, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21190 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21191 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21192 = "torch.aten.clamp"(%21189, %21190, %21191) : (!torch.vtensor<[4,?,14336],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21192, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21193 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21194 = "torch.prims.convert_element_type"(%21192, %21193) : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> !torch.vtensor<[4,?,14336],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21194, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>, !torch.int) -> ()
%21195 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21196 = "torch.aten.unsqueeze"(%17347, %21195) : (!torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,14336],f8E4M3FNUZ>
%21197 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21198 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21199 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%21200 = "torch.prim.ListConstruct"(%21197, %21198, %21199) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21201 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21202 = "torch.aten.expand"(%21196, %21200, %21201) : (!torch.vtensor<[1,4096,14336],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,14336],f8E4M3FNUZ>
%21203 = "torch_c.to_builtin_tensor"(%21194) : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>) -> tensor<4x?x14336xf8E4M3FNUZ>
%21204 = "torch_c.to_builtin_tensor"(%21202) : (!torch.vtensor<[4,4096,14336],f8E4M3FNUZ>) -> tensor<4x4096x14336xf8E4M3FNUZ>
%21205 = "util.call"(%21203, %21204) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx14336xf8E4M3FNUZ_R4x4096x14336xf8E4M3FNUZ}> : (tensor<4x?x14336xf8E4M3FNUZ>, tensor<4x4096x14336xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%21206 = "torch_c.from_builtin_tensor"(%21205) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21206, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21207 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21208 = "torch.prims.convert_element_type"(%21206, %21207) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21208, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21209 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21210 = "torch.aten.add.Tensor"(%21127, %21208, %21209) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21210, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21211 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21212 = "torch.prims.convert_element_type"(%21210, %21211) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21212, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21213 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21214 = "torch.aten.pow.Tensor_Scalar"(%21212, %21213) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21214, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21215 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%21216 = "torch.prim.ListConstruct"(%21215) : (!torch.int) -> !torch.list<int>
%21217 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%21218 = "torch.constant.none"() : () -> !torch.none
%21219 = "torch.aten.mean.dim"(%21214, %21216, %21217, %21218) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21219, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21220 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%21221 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21222 = "torch.aten.add.Scalar"(%21219, %21220, %21221) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21222, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21223 = "torch.aten.rsqrt"(%21222) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21223, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21224 = "torch.aten.mul.Tensor"(%21212, %21223) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21224, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21225 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21226 = "torch.prims.convert_element_type"(%21224, %21225) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21226, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21227 = "torch.aten.mul.Tensor"(%17349, %21226) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21227, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21228 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21229 = "torch.prims.convert_element_type"(%21227, %21228) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21229, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21230 = "torch.aten.div.Tensor"(%21229, %17351) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21230, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21231 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21232 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21233 = "torch.aten.clamp"(%21230, %21231, %21232) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21233, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21234 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21235 = "torch.prims.convert_element_type"(%21233, %21234) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21235, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21236 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21237 = "torch.aten.unsqueeze"(%17353, %21236) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%21238 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21239 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21240 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21241 = "torch.prim.ListConstruct"(%21238, %21239, %21240) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21242 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21243 = "torch.aten.expand"(%21237, %21241, %21242) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%21244 = "torch_c.to_builtin_tensor"(%21235) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21245 = "torch_c.to_builtin_tensor"(%21243) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%21246 = "util.call"(%21244, %21245) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%21247 = "torch_c.from_builtin_tensor"(%21246) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21247, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21248 = "torch.aten.div.Tensor"(%21247, %17355) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21248, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21249 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21250 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21251 = "torch.aten.clamp"(%21248, %21249, %21250) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21251, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21252 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21253 = "torch.prims.convert_element_type"(%21251, %21252) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21253, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21254 = "torch.aten.div.Tensor"(%21229, %17357) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21254, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21255 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21256 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21257 = "torch.aten.clamp"(%21254, %21255, %21256) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21257, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21258 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21259 = "torch.prims.convert_element_type"(%21257, %21258) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21259, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21260 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21261 = "torch.aten.unsqueeze"(%17359, %21260) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%21262 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21263 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%21264 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21265 = "torch.prim.ListConstruct"(%21262, %21263, %21264) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21266 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21267 = "torch.aten.expand"(%21261, %21265, %21266) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%21268 = "torch_c.to_builtin_tensor"(%21259) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21269 = "torch_c.to_builtin_tensor"(%21267) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%21270 = "util.call"(%21268, %21269) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%21271 = "torch_c.from_builtin_tensor"(%21270) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21271, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21272 = "torch.aten.div.Tensor"(%21271, %17361) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21272, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21273 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21274 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21275 = "torch.aten.clamp"(%21272, %21273, %21274) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21275, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21276 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21277 = "torch.prims.convert_element_type"(%21275, %21276) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21277, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%21278 = "torch.aten.div.Tensor"(%21229, %17363) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21278, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21279 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21280 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21281 = "torch.aten.clamp"(%21278, %21279, %21280) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21281, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21282 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21283 = "torch.prims.convert_element_type"(%21281, %21282) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21283, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21284 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21285 = "torch.aten.unsqueeze"(%17365, %21284) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%21286 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21287 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%21288 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21289 = "torch.prim.ListConstruct"(%21286, %21287, %21288) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21290 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21291 = "torch.aten.expand"(%21285, %21289, %21290) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%21292 = "torch_c.to_builtin_tensor"(%21283) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21293 = "torch_c.to_builtin_tensor"(%21291) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%21294 = "util.call"(%21292, %21293) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%21295 = "torch_c.from_builtin_tensor"(%21294) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21295, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21296 = "torch.aten.div.Tensor"(%21295, %17367) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21296, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21297 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21298 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21299 = "torch.aten.clamp"(%21296, %21297, %21298) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21299, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21300 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21301 = "torch.prims.convert_element_type"(%21299, %21300) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21301, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%21302 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21303 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21304 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21305 = "torch.prim.ListConstruct"(%21302, %18481, %21303, %21304) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21306 = "torch.aten.view"(%21253, %21305) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21306, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21307 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21308 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21309 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21310 = "torch.prim.ListConstruct"(%21307, %18481, %21308, %21309) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21311 = "torch.aten.view"(%21277, %21310) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21311, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21312 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21313 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21314 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21315 = "torch.prim.ListConstruct"(%21312, %18481, %21313, %21314) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21316 = "torch.aten.view"(%21301, %21315) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21316, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21317 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%21318 = "torch.constant.none"() : () -> !torch.none
%21319 = "torch.constant.none"() : () -> !torch.none
%21320 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%21321 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21322 = "torch.aten.arange"(%21317, %21318, %21319, %21320, %21321) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%21323 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21324 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21325 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21326 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21327 = "torch.constant.none"() : () -> !torch.none
%21328 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%21329 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21330 = "torch.aten.arange.start_step"(%21323, %21324, %21325, %21326, %21327, %21328, %21329) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%21331 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21332 = "torch.prims.convert_element_type"(%21330, %21331) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%21333 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21334 = "torch.aten.div.Scalar"(%21332, %21333) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21335 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%21336 = "torch.aten.pow.Scalar"(%21335, %21334) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21337 = "torch.aten.reciprocal"(%21336) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21338 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%21339 = "torch.aten.mul.Scalar"(%21337, %21338) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%21340 = "torch.aten.reciprocal"(%21339) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21341 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%21342 = "torch.aten.mul.Scalar"(%21340, %21341) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%21343 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%21344 = "torch.aten.gt.Scalar"(%21342, %21343) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%21345 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21346 = "torch.aten.div.Scalar"(%21339, %21345) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21347 = "torch.aten.where.self"(%21344, %21346, %21339) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21348 = "torch.aten.reciprocal"(%21342) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21349 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%21350 = "torch.aten.mul.Scalar"(%21348, %21349) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21351 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21352 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21353 = "torch.aten.sub.Scalar"(%21350, %21351, %21352) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%21354 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21355 = "torch.aten.div.Scalar"(%21353, %21354) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21356 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21357 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21358 = "torch.aten.rsub.Scalar"(%21355, %21356, %21357) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%21359 = "torch.aten.mul.Tensor"(%21358, %21347) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21360 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21361 = "torch.aten.div.Scalar"(%21359, %21360) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21362 = "torch.aten.mul.Tensor"(%21355, %21347) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21363 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21364 = "torch.aten.add.Tensor"(%21361, %21362, %21363) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21365 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%21366 = "torch.aten.lt.Scalar"(%21342, %21365) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%21367 = "torch.aten.bitwise_not"(%21366) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%21368 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%21369 = "torch.aten.gt.Scalar"(%21342, %21368) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%21370 = "torch.aten.bitwise_not"(%21369) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%21371 = "torch.aten.mul.Tensor"(%21367, %21370) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%21372 = "torch.aten.where.self"(%21371, %21364, %21347) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21373 = "torch.prim.ListConstruct"(%21372, %21372) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%21374 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%21375 = "torch.aten.cat"(%21373, %21374) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%21376 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21377 = "torch.prims.convert_element_type"(%21322, %21376) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%21378 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21379 = "torch.prims.convert_element_type"(%21375, %21378) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%21380 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%21381 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21382 = "torch.prim.ListConstruct"(%21380, %21381) : (!torch.int, !torch.int) -> !torch.list<int>
%21383 = "torch.aten.view"(%21377, %21382) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%21384 = "torch.aten.mul.Tensor"(%21383, %21379) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%21385 = "torch.aten.cos"(%21384) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%21386 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21387 = "torch.prims.convert_element_type"(%21385, %21386) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%21388 = "torch.aten.sin"(%21384) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%21389 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21390 = "torch.prims.convert_element_type"(%21388, %21389) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%21391 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21392 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21393 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21394 = "torch.aten.slice.Tensor"(%21387, %21391, %21392, %18481, %21393) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%21394, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%21395 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21396 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21397 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21398 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21399 = "torch.aten.slice.Tensor"(%21394, %21395, %21396, %21397, %21398) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%21399, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%21400 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21401 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21402 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21403 = "torch.aten.slice.Tensor"(%21390, %21400, %21401, %18481, %21402) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%21403, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%21404 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21405 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21406 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21407 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21408 = "torch.aten.slice.Tensor"(%21403, %21404, %21405, %21406, %21407) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%21408, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%21409 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21410 = "torch.aten.unsqueeze"(%21399, %21409) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%21410, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%21411 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21412 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21413 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21414 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21415 = "torch.aten.slice.Tensor"(%21410, %21411, %21412, %21413, %21414) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%21415, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%21416 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21417 = "torch.aten.unsqueeze"(%21415, %21416) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21417, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%21418 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21419 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21420 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21421 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21422 = "torch.aten.slice.Tensor"(%21417, %21418, %21419, %21420, %21421) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21422, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%21423 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21424 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21425 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21426 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21427 = "torch.prim.ListConstruct"(%21423, %21424, %21425, %21426) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21428 = "torch.aten.repeat"(%21422, %21427) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21428, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%21429 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21430 = "torch.aten.unsqueeze"(%21408, %21429) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%21430, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%21431 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21432 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21433 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21434 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21435 = "torch.aten.slice.Tensor"(%21430, %21431, %21432, %21433, %21434) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%21435, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%21436 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21437 = "torch.aten.unsqueeze"(%21435, %21436) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21437, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%21438 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21439 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21440 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21441 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21442 = "torch.aten.slice.Tensor"(%21437, %21438, %21439, %21440, %21441) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21442, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%21443 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21444 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21445 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21446 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21447 = "torch.prim.ListConstruct"(%21443, %21444, %21445, %21446) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21448 = "torch.aten.repeat"(%21442, %21447) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21448, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%21449 = "torch.aten.mul.Tensor"(%21306, %21428) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21449, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21450 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21451 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21452 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%21453 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21454 = "torch.aten.slice.Tensor"(%21306, %21450, %21451, %21452, %21453) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21454, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%21455 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21456 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%21457 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21458 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21459 = "torch.aten.slice.Tensor"(%21306, %21455, %21456, %21457, %21458) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21459, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%21460 = "torch.aten.neg"(%21459) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21460, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%21461 = "torch.prim.ListConstruct"(%21460, %21454) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%21462 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%21463 = "torch.aten.cat"(%21461, %21462) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21463, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21464 = "torch.aten.mul.Tensor"(%21463, %21448) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21464, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21465 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21466 = "torch.aten.add.Tensor"(%21449, %21464, %21465) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21466, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21467 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%21468 = "torch.constant.none"() : () -> !torch.none
%21469 = "torch.constant.none"() : () -> !torch.none
%21470 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%21471 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21472 = "torch.aten.arange"(%21467, %21468, %21469, %21470, %21471) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%21473 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21474 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21475 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21476 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21477 = "torch.constant.none"() : () -> !torch.none
%21478 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%21479 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21480 = "torch.aten.arange.start_step"(%21473, %21474, %21475, %21476, %21477, %21478, %21479) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%21481 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21482 = "torch.prims.convert_element_type"(%21480, %21481) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%21483 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21484 = "torch.aten.div.Scalar"(%21482, %21483) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21485 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%21486 = "torch.aten.pow.Scalar"(%21485, %21484) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21487 = "torch.aten.reciprocal"(%21486) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21488 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%21489 = "torch.aten.mul.Scalar"(%21487, %21488) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%21490 = "torch.aten.reciprocal"(%21489) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21491 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%21492 = "torch.aten.mul.Scalar"(%21490, %21491) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%21493 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%21494 = "torch.aten.gt.Scalar"(%21492, %21493) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%21495 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21496 = "torch.aten.div.Scalar"(%21489, %21495) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21497 = "torch.aten.where.self"(%21494, %21496, %21489) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21498 = "torch.aten.reciprocal"(%21492) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21499 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%21500 = "torch.aten.mul.Scalar"(%21498, %21499) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21501 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21502 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21503 = "torch.aten.sub.Scalar"(%21500, %21501, %21502) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%21504 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21505 = "torch.aten.div.Scalar"(%21503, %21504) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21506 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21507 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21508 = "torch.aten.rsub.Scalar"(%21505, %21506, %21507) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%21509 = "torch.aten.mul.Tensor"(%21508, %21497) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21510 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21511 = "torch.aten.div.Scalar"(%21509, %21510) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21512 = "torch.aten.mul.Tensor"(%21505, %21497) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21513 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21514 = "torch.aten.add.Tensor"(%21511, %21512, %21513) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21515 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%21516 = "torch.aten.lt.Scalar"(%21492, %21515) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%21517 = "torch.aten.bitwise_not"(%21516) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%21518 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%21519 = "torch.aten.gt.Scalar"(%21492, %21518) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%21520 = "torch.aten.bitwise_not"(%21519) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%21521 = "torch.aten.mul.Tensor"(%21517, %21520) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%21522 = "torch.aten.where.self"(%21521, %21514, %21497) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21523 = "torch.prim.ListConstruct"(%21522, %21522) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%21524 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%21525 = "torch.aten.cat"(%21523, %21524) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%21526 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21527 = "torch.prims.convert_element_type"(%21472, %21526) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%21528 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21529 = "torch.prims.convert_element_type"(%21525, %21528) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%21530 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%21531 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21532 = "torch.prim.ListConstruct"(%21530, %21531) : (!torch.int, !torch.int) -> !torch.list<int>
%21533 = "torch.aten.view"(%21527, %21532) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%21534 = "torch.aten.mul.Tensor"(%21533, %21529) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%21535 = "torch.aten.cos"(%21534) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%21536 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21537 = "torch.prims.convert_element_type"(%21535, %21536) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%21538 = "torch.aten.sin"(%21534) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%21539 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21540 = "torch.prims.convert_element_type"(%21538, %21539) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%21541 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21542 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21543 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21544 = "torch.aten.slice.Tensor"(%21537, %21541, %21542, %18481, %21543) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%21544, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%21545 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21546 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21547 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21548 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21549 = "torch.aten.slice.Tensor"(%21544, %21545, %21546, %21547, %21548) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%21549, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%21550 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21551 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21552 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21553 = "torch.aten.slice.Tensor"(%21540, %21550, %21551, %18481, %21552) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%21553, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%21554 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21555 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21556 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21557 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21558 = "torch.aten.slice.Tensor"(%21553, %21554, %21555, %21556, %21557) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%21558, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%21559 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21560 = "torch.aten.unsqueeze"(%21549, %21559) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%21560, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%21561 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21562 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21563 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21564 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21565 = "torch.aten.slice.Tensor"(%21560, %21561, %21562, %21563, %21564) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%21565, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%21566 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21567 = "torch.aten.unsqueeze"(%21565, %21566) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21567, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%21568 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21569 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21570 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21571 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21572 = "torch.aten.slice.Tensor"(%21567, %21568, %21569, %21570, %21571) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21572, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%21573 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21574 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21575 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21576 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21577 = "torch.prim.ListConstruct"(%21573, %21574, %21575, %21576) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21578 = "torch.aten.repeat"(%21572, %21577) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21578, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%21579 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21580 = "torch.aten.unsqueeze"(%21558, %21579) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%21580, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%21581 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21582 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21583 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21584 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21585 = "torch.aten.slice.Tensor"(%21580, %21581, %21582, %21583, %21584) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%21585, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%21586 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21587 = "torch.aten.unsqueeze"(%21585, %21586) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21587, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%21588 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21589 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21590 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21591 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21592 = "torch.aten.slice.Tensor"(%21587, %21588, %21589, %21590, %21591) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21592, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%21593 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21594 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21595 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21596 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21597 = "torch.prim.ListConstruct"(%21593, %21594, %21595, %21596) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21598 = "torch.aten.repeat"(%21592, %21597) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%21598, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%21599 = "torch.aten.mul.Tensor"(%21311, %21578) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21599, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21600 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21601 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21602 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%21603 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21604 = "torch.aten.slice.Tensor"(%21311, %21600, %21601, %21602, %21603) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21604, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%21605 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%21606 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%21607 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%21608 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21609 = "torch.aten.slice.Tensor"(%21311, %21605, %21606, %21607, %21608) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21609, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%21610 = "torch.aten.neg"(%21609) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21610, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%21611 = "torch.prim.ListConstruct"(%21610, %21604) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%21612 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%21613 = "torch.aten.cat"(%21611, %21612) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21613, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21614 = "torch.aten.mul.Tensor"(%21613, %21598) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21614, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21615 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21616 = "torch.aten.add.Tensor"(%21599, %21614, %21615) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21616, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21617 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%21618 = "torch.aten.mul.Scalar"(%arg69, %21617) : (!torch.vtensor<[4,?],si64>, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%21618, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%21619 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21620 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21621 = "torch.aten.add.Scalar"(%21618, %21619, %21620) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%21621, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%21622 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21623 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21624 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21625 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21626 = "torch.prim.ListConstruct"(%21622, %18477, %21623, %21624, %21625) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21627 = "torch.aten.view"(%21616, %21626) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21627, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21628 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21629 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21630 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21631 = "torch.prim.ListConstruct"(%19011, %21628, %21629, %21630) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21632 = "torch.aten.view"(%21627, %21631) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21632, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21633 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%21634 = "torch.aten.view"(%21621, %21633) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%21634, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%21635 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21636 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21637 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21638 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21639 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21640 = "torch.prim.ListConstruct"(%18479, %21635, %21636, %21637, %21638, %21639) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21641 = "torch.aten.view"(%21043, %21640) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21641, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21642 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21643 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21644 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21645 = "torch.prim.ListConstruct"(%18993, %21642, %21643, %21644) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21646 = "torch.aten.view"(%21641, %21645) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21646, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21647 = "torch.prim.ListConstruct"(%21634) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%21648 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21649 = "torch.aten.index_put"(%21646, %21647, %21632, %21648) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21649, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21650 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21651 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21652 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21653 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21654 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21655 = "torch.prim.ListConstruct"(%18479, %21650, %21651, %21652, %21653, %21654) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21656 = "torch.aten.view"(%21649, %21655) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21656, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21657 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%21658 = "torch.prim.ListConstruct"(%18479, %21657) : (!torch.int, !torch.int) -> !torch.list<int>
%21659 = "torch.aten.view"(%21656, %21658) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21659, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%21660 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21661 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21662 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21663 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21664 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21665 = "torch.prim.ListConstruct"(%18479, %21660, %21661, %21662, %21663, %21664) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21666 = "torch.aten.view"(%21659, %21665) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21666, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21667 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21668 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21669 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21670 = "torch.prim.ListConstruct"(%18993, %21667, %21668, %21669) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21671 = "torch.aten.view"(%21666, %21670) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21671, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21672 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21673 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21674 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21675 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21676 = "torch.prim.ListConstruct"(%21672, %18477, %21673, %21674, %21675) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21677 = "torch.aten.view"(%21316, %21676) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21677, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21678 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21679 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21680 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21681 = "torch.prim.ListConstruct"(%19011, %21678, %21679, %21680) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21682 = "torch.aten.view"(%21677, %21681) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21682, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21683 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21684 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21685 = "torch.aten.add.Scalar"(%21621, %21683, %21684) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%21685, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%21686 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%21687 = "torch.aten.view"(%21685, %21686) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%21687, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%21688 = "torch.prim.ListConstruct"(%21687) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%21689 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21690 = "torch.aten.index_put"(%21671, %21688, %21682, %21689) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21690, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21691 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21692 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21693 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21694 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21695 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21696 = "torch.prim.ListConstruct"(%18479, %21691, %21692, %21693, %21694, %21695) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21697 = "torch.aten.view"(%21690, %21696) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21697, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21698 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%21699 = "torch.prim.ListConstruct"(%18479, %21698) : (!torch.int, !torch.int) -> !torch.list<int>
%21700 = "torch.aten.view"(%21697, %21699) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21700, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%21701 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%21702 = "torch.aten.unsqueeze"(%21616, %21701) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21702, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%21703 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21704 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21705 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21706 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21707 = "torch.prim.ListConstruct"(%21703, %18481, %21704, %21705, %21706) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21708 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21709 = "torch.aten.expand"(%21702, %21707, %21708) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21709, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%21710 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21711 = "torch.aten.clone"(%21709, %21710) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21711, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%21712 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21713 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21714 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21715 = "torch.prim.ListConstruct"(%21712, %18481, %21713, %21714) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21716 = "torch.aten._unsafe_view"(%21711, %21715) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21716, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21717 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%21718 = "torch.aten.unsqueeze"(%21316, %21717) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21718, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%21719 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21720 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21721 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21722 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21723 = "torch.prim.ListConstruct"(%21719, %18481, %21720, %21721, %21722) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21724 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21725 = "torch.aten.expand"(%21718, %21723, %21724) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21725, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%21726 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21727 = "torch.aten.clone"(%21725, %21726) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21727, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%21728 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21729 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21730 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21731 = "torch.prim.ListConstruct"(%21728, %18481, %21729, %21730) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21732 = "torch.aten._unsafe_view"(%21727, %21731) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21732, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21733 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21734 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21735 = "torch.aten.transpose.int"(%21466, %21733, %21734) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21735, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%21736 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21737 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21738 = "torch.aten.transpose.int"(%21716, %21736, %21737) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21738, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%21739 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21740 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21741 = "torch.aten.transpose.int"(%21732, %21739, %21740) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21741, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%21742 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21743 = "torch.aten.squeeze.dim"(%18570, %21742) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21743, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%21744 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21745 = "torch.aten.squeeze.dim"(%21743, %21744) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21745, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%21746 = "torch_c.to_builtin_tensor"(%21735) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%21747 = "torch_c.to_builtin_tensor"(%21738) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%21748 = "torch_c.to_builtin_tensor"(%21741) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%21749 = "torch_c.to_builtin_tensor"(%21745) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>) -> tensor<4x1x?x?xf8E4M3FNUZ>
%21750 = "tensor.cast"(%21749) : (tensor<4x1x?x?xf8E4M3FNUZ>) -> tensor<?x?x?x?xf8E4M3FNUZ>
%21751 = "torch_c.to_builtin_tensor"(%17369) : (!torch.vtensor<[],f32>) -> tensor<f32>
%21752 = "util.call"(%21746, %21747, %21748, %21751, %21750) <{callee = @sharktank_masked_flash_attention_4_32_128_128_f8E4M3FNUZ_f32_f32}> : (tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<f32>, tensor<?x?x?x?xf8E4M3FNUZ>) -> tensor<4x32x?x128xf32>
%21753 = "torch_c.from_builtin_tensor"(%21752) : (tensor<4x32x?x128xf32>) -> !torch.vtensor<[4,32,?,128],f32>
"torch.bind_symbolic_shape"(%21753, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f32>, !torch.int) -> ()
%21754 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21755 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21756 = "torch.aten.transpose.int"(%21753, %21754, %21755) : (!torch.vtensor<[4,32,?,128],f32>, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%21756, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%21757 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21758 = "torch.aten.clone"(%21756, %21757) : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%21758, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%21759 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21760 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21761 = "torch.prim.ListConstruct"(%21759, %18481, %21760) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21762 = "torch.aten._unsafe_view"(%21758, %21761) : (!torch.vtensor<[4,?,32,128],f32>, !torch.list<int>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21762, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21763 = "torch.aten.div.Tensor"(%21762, %17371) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21763, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21764 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21765 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21766 = "torch.aten.clamp"(%21763, %21764, %21765) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21766, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21767 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21768 = "torch.prims.convert_element_type"(%21766, %21767) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21768, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21769 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21770 = "torch.aten.unsqueeze"(%17373, %21769) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%21771 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21772 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21773 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21774 = "torch.prim.ListConstruct"(%21771, %21772, %21773) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21775 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21776 = "torch.aten.expand"(%21770, %21774, %21775) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%21777 = "torch_c.to_builtin_tensor"(%21768) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21778 = "torch_c.to_builtin_tensor"(%21776) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%21779 = "util.call"(%21777, %21778) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%21780 = "torch_c.from_builtin_tensor"(%21779) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21780, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21781 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21782 = "torch.prims.convert_element_type"(%21780, %21781) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21782, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21783 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21784 = "torch.aten.add.Tensor"(%21210, %21782, %21783) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21784, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21785 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21786 = "torch.prims.convert_element_type"(%21784, %21785) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21786, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21787 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21788 = "torch.aten.pow.Tensor_Scalar"(%21786, %21787) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21788, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21789 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%21790 = "torch.prim.ListConstruct"(%21789) : (!torch.int) -> !torch.list<int>
%21791 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%21792 = "torch.constant.none"() : () -> !torch.none
%21793 = "torch.aten.mean.dim"(%21788, %21790, %21791, %21792) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21793, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21794 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%21795 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21796 = "torch.aten.add.Scalar"(%21793, %21794, %21795) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21796, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21797 = "torch.aten.rsqrt"(%21796) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21797, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21798 = "torch.aten.mul.Tensor"(%21786, %21797) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21798, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21799 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21800 = "torch.prims.convert_element_type"(%21798, %21799) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21800, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21801 = "torch.aten.mul.Tensor"(%17375, %21800) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21801, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21802 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21803 = "torch.prims.convert_element_type"(%21801, %21802) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21803, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21804 = "torch.aten.div.Tensor"(%21803, %17377) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21804, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21805 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21806 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21807 = "torch.aten.clamp"(%21804, %21805, %21806) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21807, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21808 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21809 = "torch.prims.convert_element_type"(%21807, %21808) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21809, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21810 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21811 = "torch.aten.unsqueeze"(%17379, %21810) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%21812 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21813 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%21814 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21815 = "torch.prim.ListConstruct"(%21812, %21813, %21814) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21816 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21817 = "torch.aten.expand"(%21811, %21815, %21816) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%21818 = "torch_c.to_builtin_tensor"(%21809) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21819 = "torch_c.to_builtin_tensor"(%21817) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%21820 = "util.call"(%21818, %21819) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%21821 = "torch_c.from_builtin_tensor"(%21820) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%21821, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%21822 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21823 = "torch.prims.convert_element_type"(%21821, %21822) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21823, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21824 = "torch.aten.silu"(%21823) : (!torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21824, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21825 = "torch.aten.div.Tensor"(%21803, %17381) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21825, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21826 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21827 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21828 = "torch.aten.clamp"(%21825, %21826, %21827) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21828, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21829 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21830 = "torch.prims.convert_element_type"(%21828, %21829) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21830, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21831 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21832 = "torch.aten.unsqueeze"(%17383, %21831) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%21833 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21834 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%21835 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21836 = "torch.prim.ListConstruct"(%21833, %21834, %21835) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21837 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21838 = "torch.aten.expand"(%21832, %21836, %21837) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%21839 = "torch_c.to_builtin_tensor"(%21830) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21840 = "torch_c.to_builtin_tensor"(%21838) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%21841 = "util.call"(%21839, %21840) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%21842 = "torch_c.from_builtin_tensor"(%21841) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%21842, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%21843 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21844 = "torch.prims.convert_element_type"(%21842, %21843) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21844, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21845 = "torch.aten.mul.Tensor"(%21824, %21844) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21845, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21846 = "torch.aten.div.Tensor"(%21845, %17385) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21846, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21847 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21848 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21849 = "torch.aten.clamp"(%21846, %21847, %21848) : (!torch.vtensor<[4,?,14336],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%21849, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%21850 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21851 = "torch.prims.convert_element_type"(%21849, %21850) : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> !torch.vtensor<[4,?,14336],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21851, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>, !torch.int) -> ()
%21852 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21853 = "torch.aten.unsqueeze"(%17387, %21852) : (!torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,14336],f8E4M3FNUZ>
%21854 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21855 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21856 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%21857 = "torch.prim.ListConstruct"(%21854, %21855, %21856) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21858 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21859 = "torch.aten.expand"(%21853, %21857, %21858) : (!torch.vtensor<[1,4096,14336],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,14336],f8E4M3FNUZ>
%21860 = "torch_c.to_builtin_tensor"(%21851) : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>) -> tensor<4x?x14336xf8E4M3FNUZ>
%21861 = "torch_c.to_builtin_tensor"(%21859) : (!torch.vtensor<[4,4096,14336],f8E4M3FNUZ>) -> tensor<4x4096x14336xf8E4M3FNUZ>
%21862 = "util.call"(%21860, %21861) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx14336xf8E4M3FNUZ_R4x4096x14336xf8E4M3FNUZ}> : (tensor<4x?x14336xf8E4M3FNUZ>, tensor<4x4096x14336xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%21863 = "torch_c.from_builtin_tensor"(%21862) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21863, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21864 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21865 = "torch.prims.convert_element_type"(%21863, %21864) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21865, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21866 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21867 = "torch.aten.add.Tensor"(%21784, %21865, %21866) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21867, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21868 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21869 = "torch.prims.convert_element_type"(%21867, %21868) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21869, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21870 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21871 = "torch.aten.pow.Tensor_Scalar"(%21869, %21870) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21871, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21872 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%21873 = "torch.prim.ListConstruct"(%21872) : (!torch.int) -> !torch.list<int>
%21874 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%21875 = "torch.constant.none"() : () -> !torch.none
%21876 = "torch.aten.mean.dim"(%21871, %21873, %21874, %21875) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21876, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21877 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%21878 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%21879 = "torch.aten.add.Scalar"(%21876, %21877, %21878) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21879, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21880 = "torch.aten.rsqrt"(%21879) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%21880, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%21881 = "torch.aten.mul.Tensor"(%21869, %21880) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21881, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21882 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21883 = "torch.prims.convert_element_type"(%21881, %21882) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21883, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21884 = "torch.aten.mul.Tensor"(%17389, %21883) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21884, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21885 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%21886 = "torch.prims.convert_element_type"(%21884, %21885) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21886, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21887 = "torch.aten.div.Tensor"(%21886, %17391) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21887, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21888 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21889 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21890 = "torch.aten.clamp"(%21887, %21888, %21889) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21890, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21891 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21892 = "torch.prims.convert_element_type"(%21890, %21891) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21892, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21893 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21894 = "torch.aten.unsqueeze"(%17393, %21893) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%21895 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21896 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21897 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21898 = "torch.prim.ListConstruct"(%21895, %21896, %21897) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21899 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21900 = "torch.aten.expand"(%21894, %21898, %21899) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%21901 = "torch_c.to_builtin_tensor"(%21892) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21902 = "torch_c.to_builtin_tensor"(%21900) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%21903 = "util.call"(%21901, %21902) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%21904 = "torch_c.from_builtin_tensor"(%21903) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21904, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21905 = "torch.aten.div.Tensor"(%21904, %17395) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21905, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21906 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21907 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21908 = "torch.aten.clamp"(%21905, %21906, %21907) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%21908, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%21909 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21910 = "torch.prims.convert_element_type"(%21908, %21909) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21910, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21911 = "torch.aten.div.Tensor"(%21886, %17397) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21911, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21912 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21913 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21914 = "torch.aten.clamp"(%21911, %21912, %21913) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21914, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21915 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21916 = "torch.prims.convert_element_type"(%21914, %21915) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21916, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21917 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21918 = "torch.aten.unsqueeze"(%17399, %21917) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%21919 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21920 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%21921 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21922 = "torch.prim.ListConstruct"(%21919, %21920, %21921) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21923 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21924 = "torch.aten.expand"(%21918, %21922, %21923) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%21925 = "torch_c.to_builtin_tensor"(%21916) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21926 = "torch_c.to_builtin_tensor"(%21924) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%21927 = "util.call"(%21925, %21926) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%21928 = "torch_c.from_builtin_tensor"(%21927) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21928, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21929 = "torch.aten.div.Tensor"(%21928, %17401) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21929, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21930 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21931 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21932 = "torch.aten.clamp"(%21929, %21930, %21931) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21932, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21933 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21934 = "torch.prims.convert_element_type"(%21932, %21933) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21934, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%21935 = "torch.aten.div.Tensor"(%21886, %17403) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21935, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21936 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21937 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21938 = "torch.aten.clamp"(%21935, %21936, %21937) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%21938, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%21939 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21940 = "torch.prims.convert_element_type"(%21938, %21939) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21940, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%21941 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21942 = "torch.aten.unsqueeze"(%17405, %21941) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%21943 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21944 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%21945 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%21946 = "torch.prim.ListConstruct"(%21943, %21944, %21945) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21947 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21948 = "torch.aten.expand"(%21942, %21946, %21947) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%21949 = "torch_c.to_builtin_tensor"(%21940) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%21950 = "torch_c.to_builtin_tensor"(%21948) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%21951 = "util.call"(%21949, %21950) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%21952 = "torch_c.from_builtin_tensor"(%21951) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21952, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21953 = "torch.aten.div.Tensor"(%21952, %17407) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21953, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21954 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%21955 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%21956 = "torch.aten.clamp"(%21953, %21954, %21955) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%21956, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%21957 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%21958 = "torch.prims.convert_element_type"(%21956, %21957) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21958, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%21959 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21960 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%21961 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21962 = "torch.prim.ListConstruct"(%21959, %18481, %21960, %21961) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21963 = "torch.aten.view"(%21910, %21962) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21963, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%21964 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21965 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21966 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21967 = "torch.prim.ListConstruct"(%21964, %18481, %21965, %21966) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21968 = "torch.aten.view"(%21934, %21967) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21968, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21969 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21970 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%21971 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21972 = "torch.prim.ListConstruct"(%21969, %18481, %21970, %21971) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%21973 = "torch.aten.view"(%21958, %21972) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%21973, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%21974 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%21975 = "torch.constant.none"() : () -> !torch.none
%21976 = "torch.constant.none"() : () -> !torch.none
%21977 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%21978 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21979 = "torch.aten.arange"(%21974, %21975, %21976, %21977, %21978) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%21980 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%21981 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21982 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%21983 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%21984 = "torch.constant.none"() : () -> !torch.none
%21985 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%21986 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%21987 = "torch.aten.arange.start_step"(%21980, %21981, %21982, %21983, %21984, %21985, %21986) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%21988 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%21989 = "torch.prims.convert_element_type"(%21987, %21988) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%21990 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%21991 = "torch.aten.div.Scalar"(%21989, %21990) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%21992 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%21993 = "torch.aten.pow.Scalar"(%21992, %21991) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21994 = "torch.aten.reciprocal"(%21993) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21995 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%21996 = "torch.aten.mul.Scalar"(%21994, %21995) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%21997 = "torch.aten.reciprocal"(%21996) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%21998 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%21999 = "torch.aten.mul.Scalar"(%21997, %21998) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%22000 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%22001 = "torch.aten.gt.Scalar"(%21999, %22000) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22002 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22003 = "torch.aten.div.Scalar"(%21996, %22002) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22004 = "torch.aten.where.self"(%22001, %22003, %21996) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22005 = "torch.aten.reciprocal"(%21999) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22006 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%22007 = "torch.aten.mul.Scalar"(%22005, %22006) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22008 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22009 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22010 = "torch.aten.sub.Scalar"(%22007, %22008, %22009) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%22011 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22012 = "torch.aten.div.Scalar"(%22010, %22011) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22013 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22014 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22015 = "torch.aten.rsub.Scalar"(%22012, %22013, %22014) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%22016 = "torch.aten.mul.Tensor"(%22015, %22004) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22017 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22018 = "torch.aten.div.Scalar"(%22016, %22017) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22019 = "torch.aten.mul.Tensor"(%22012, %22004) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22020 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22021 = "torch.aten.add.Tensor"(%22018, %22019, %22020) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22022 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%22023 = "torch.aten.lt.Scalar"(%21999, %22022) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22024 = "torch.aten.bitwise_not"(%22023) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22025 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%22026 = "torch.aten.gt.Scalar"(%21999, %22025) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22027 = "torch.aten.bitwise_not"(%22026) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22028 = "torch.aten.mul.Tensor"(%22024, %22027) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22029 = "torch.aten.where.self"(%22028, %22021, %22004) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22030 = "torch.prim.ListConstruct"(%22029, %22029) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%22031 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22032 = "torch.aten.cat"(%22030, %22031) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%22033 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22034 = "torch.prims.convert_element_type"(%21979, %22033) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%22035 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22036 = "torch.prims.convert_element_type"(%22032, %22035) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%22037 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%22038 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22039 = "torch.prim.ListConstruct"(%22037, %22038) : (!torch.int, !torch.int) -> !torch.list<int>
%22040 = "torch.aten.view"(%22034, %22039) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%22041 = "torch.aten.mul.Tensor"(%22040, %22036) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%22042 = "torch.aten.cos"(%22041) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%22043 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22044 = "torch.prims.convert_element_type"(%22042, %22043) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%22045 = "torch.aten.sin"(%22041) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%22046 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22047 = "torch.prims.convert_element_type"(%22045, %22046) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%22048 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22049 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22050 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22051 = "torch.aten.slice.Tensor"(%22044, %22048, %22049, %18481, %22050) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22051, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22052 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22053 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22054 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22055 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22056 = "torch.aten.slice.Tensor"(%22051, %22052, %22053, %22054, %22055) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22056, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22057 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22058 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22059 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22060 = "torch.aten.slice.Tensor"(%22047, %22057, %22058, %18481, %22059) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22060, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22061 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22062 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22063 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22064 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22065 = "torch.aten.slice.Tensor"(%22060, %22061, %22062, %22063, %22064) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22065, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22066 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22067 = "torch.aten.unsqueeze"(%22056, %22066) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22067, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22068 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22069 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22070 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22071 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22072 = "torch.aten.slice.Tensor"(%22067, %22068, %22069, %22070, %22071) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22072, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22073 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22074 = "torch.aten.unsqueeze"(%22072, %22073) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22074, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22075 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22076 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22077 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22078 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22079 = "torch.aten.slice.Tensor"(%22074, %22075, %22076, %22077, %22078) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22079, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22080 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22081 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22082 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22083 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22084 = "torch.prim.ListConstruct"(%22080, %22081, %22082, %22083) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22085 = "torch.aten.repeat"(%22079, %22084) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22085, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%22086 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22087 = "torch.aten.unsqueeze"(%22065, %22086) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22087, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22088 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22089 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22090 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22091 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22092 = "torch.aten.slice.Tensor"(%22087, %22088, %22089, %22090, %22091) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22092, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22093 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22094 = "torch.aten.unsqueeze"(%22092, %22093) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22094, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22095 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22096 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22097 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22098 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22099 = "torch.aten.slice.Tensor"(%22094, %22095, %22096, %22097, %22098) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22099, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22100 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22101 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22102 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22103 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22104 = "torch.prim.ListConstruct"(%22100, %22101, %22102, %22103) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22105 = "torch.aten.repeat"(%22099, %22104) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22105, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%22106 = "torch.aten.mul.Tensor"(%21963, %22085) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22106, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22107 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22108 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22109 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22110 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22111 = "torch.aten.slice.Tensor"(%21963, %22107, %22108, %22109, %22110) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22111, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%22112 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22113 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22114 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22115 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22116 = "torch.aten.slice.Tensor"(%21963, %22112, %22113, %22114, %22115) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22116, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%22117 = "torch.aten.neg"(%22116) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22117, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%22118 = "torch.prim.ListConstruct"(%22117, %22111) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%22119 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22120 = "torch.aten.cat"(%22118, %22119) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22120, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22121 = "torch.aten.mul.Tensor"(%22120, %22105) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22121, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22122 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22123 = "torch.aten.add.Tensor"(%22106, %22121, %22122) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22123, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22124 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%22125 = "torch.constant.none"() : () -> !torch.none
%22126 = "torch.constant.none"() : () -> !torch.none
%22127 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%22128 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22129 = "torch.aten.arange"(%22124, %22125, %22126, %22127, %22128) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%22130 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22131 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22132 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22133 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22134 = "torch.constant.none"() : () -> !torch.none
%22135 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%22136 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22137 = "torch.aten.arange.start_step"(%22130, %22131, %22132, %22133, %22134, %22135, %22136) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%22138 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22139 = "torch.prims.convert_element_type"(%22137, %22138) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%22140 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22141 = "torch.aten.div.Scalar"(%22139, %22140) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22142 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%22143 = "torch.aten.pow.Scalar"(%22142, %22141) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22144 = "torch.aten.reciprocal"(%22143) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22145 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%22146 = "torch.aten.mul.Scalar"(%22144, %22145) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%22147 = "torch.aten.reciprocal"(%22146) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22148 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%22149 = "torch.aten.mul.Scalar"(%22147, %22148) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%22150 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%22151 = "torch.aten.gt.Scalar"(%22149, %22150) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22152 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22153 = "torch.aten.div.Scalar"(%22146, %22152) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22154 = "torch.aten.where.self"(%22151, %22153, %22146) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22155 = "torch.aten.reciprocal"(%22149) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22156 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%22157 = "torch.aten.mul.Scalar"(%22155, %22156) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22158 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22159 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22160 = "torch.aten.sub.Scalar"(%22157, %22158, %22159) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%22161 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22162 = "torch.aten.div.Scalar"(%22160, %22161) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22163 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22164 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22165 = "torch.aten.rsub.Scalar"(%22162, %22163, %22164) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%22166 = "torch.aten.mul.Tensor"(%22165, %22154) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22167 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22168 = "torch.aten.div.Scalar"(%22166, %22167) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22169 = "torch.aten.mul.Tensor"(%22162, %22154) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22170 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22171 = "torch.aten.add.Tensor"(%22168, %22169, %22170) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22172 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%22173 = "torch.aten.lt.Scalar"(%22149, %22172) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22174 = "torch.aten.bitwise_not"(%22173) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22175 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%22176 = "torch.aten.gt.Scalar"(%22149, %22175) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22177 = "torch.aten.bitwise_not"(%22176) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22178 = "torch.aten.mul.Tensor"(%22174, %22177) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22179 = "torch.aten.where.self"(%22178, %22171, %22154) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22180 = "torch.prim.ListConstruct"(%22179, %22179) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%22181 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22182 = "torch.aten.cat"(%22180, %22181) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%22183 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22184 = "torch.prims.convert_element_type"(%22129, %22183) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%22185 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22186 = "torch.prims.convert_element_type"(%22182, %22185) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%22187 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%22188 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22189 = "torch.prim.ListConstruct"(%22187, %22188) : (!torch.int, !torch.int) -> !torch.list<int>
%22190 = "torch.aten.view"(%22184, %22189) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%22191 = "torch.aten.mul.Tensor"(%22190, %22186) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%22192 = "torch.aten.cos"(%22191) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%22193 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22194 = "torch.prims.convert_element_type"(%22192, %22193) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%22195 = "torch.aten.sin"(%22191) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%22196 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22197 = "torch.prims.convert_element_type"(%22195, %22196) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%22198 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22199 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22200 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22201 = "torch.aten.slice.Tensor"(%22194, %22198, %22199, %18481, %22200) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22201, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22202 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22203 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22204 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22205 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22206 = "torch.aten.slice.Tensor"(%22201, %22202, %22203, %22204, %22205) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22206, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22207 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22208 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22209 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22210 = "torch.aten.slice.Tensor"(%22197, %22207, %22208, %18481, %22209) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22210, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22211 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22212 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22213 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22214 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22215 = "torch.aten.slice.Tensor"(%22210, %22211, %22212, %22213, %22214) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22215, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22216 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22217 = "torch.aten.unsqueeze"(%22206, %22216) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22217, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22218 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22219 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22220 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22221 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22222 = "torch.aten.slice.Tensor"(%22217, %22218, %22219, %22220, %22221) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22222, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22223 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22224 = "torch.aten.unsqueeze"(%22222, %22223) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22224, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22225 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22226 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22227 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22228 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22229 = "torch.aten.slice.Tensor"(%22224, %22225, %22226, %22227, %22228) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22229, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22230 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22231 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22232 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22233 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22234 = "torch.prim.ListConstruct"(%22230, %22231, %22232, %22233) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22235 = "torch.aten.repeat"(%22229, %22234) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22235, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%22236 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22237 = "torch.aten.unsqueeze"(%22215, %22236) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22237, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22238 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22239 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22240 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22241 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22242 = "torch.aten.slice.Tensor"(%22237, %22238, %22239, %22240, %22241) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22242, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22243 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22244 = "torch.aten.unsqueeze"(%22242, %22243) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22244, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22245 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22246 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22247 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22248 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22249 = "torch.aten.slice.Tensor"(%22244, %22245, %22246, %22247, %22248) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22249, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22250 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22251 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22252 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22253 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22254 = "torch.prim.ListConstruct"(%22250, %22251, %22252, %22253) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22255 = "torch.aten.repeat"(%22249, %22254) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22255, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%22256 = "torch.aten.mul.Tensor"(%21968, %22235) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22256, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22257 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22258 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22259 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22260 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22261 = "torch.aten.slice.Tensor"(%21968, %22257, %22258, %22259, %22260) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22261, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%22262 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22263 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22264 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22265 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22266 = "torch.aten.slice.Tensor"(%21968, %22262, %22263, %22264, %22265) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22266, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%22267 = "torch.aten.neg"(%22266) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22267, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%22268 = "torch.prim.ListConstruct"(%22267, %22261) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%22269 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22270 = "torch.aten.cat"(%22268, %22269) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22270, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22271 = "torch.aten.mul.Tensor"(%22270, %22255) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22271, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22272 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22273 = "torch.aten.add.Tensor"(%22256, %22271, %22272) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22273, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22274 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22275 = "torch.aten.mul.Scalar"(%arg69, %22274) : (!torch.vtensor<[4,?],si64>, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%22275, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%22276 = "torch.constant.int"() <{value = 10 : i64}> : () -> !torch.int
%22277 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22278 = "torch.aten.add.Scalar"(%22275, %22276, %22277) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%22278, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%22279 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22280 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22281 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22282 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22283 = "torch.prim.ListConstruct"(%22279, %18477, %22280, %22281, %22282) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22284 = "torch.aten.view"(%22273, %22283) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22284, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22285 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22286 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22287 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22288 = "torch.prim.ListConstruct"(%19011, %22285, %22286, %22287) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22289 = "torch.aten.view"(%22284, %22288) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22289, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22290 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%22291 = "torch.aten.view"(%22278, %22290) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%22291, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%22292 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22293 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22294 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22295 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22296 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22297 = "torch.prim.ListConstruct"(%18479, %22292, %22293, %22294, %22295, %22296) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22298 = "torch.aten.view"(%21700, %22297) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22298, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22299 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22300 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22301 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22302 = "torch.prim.ListConstruct"(%18993, %22299, %22300, %22301) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22303 = "torch.aten.view"(%22298, %22302) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22303, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22304 = "torch.prim.ListConstruct"(%22291) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%22305 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22306 = "torch.aten.index_put"(%22303, %22304, %22289, %22305) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22306, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22307 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22308 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22309 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22310 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22311 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22312 = "torch.prim.ListConstruct"(%18479, %22307, %22308, %22309, %22310, %22311) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22313 = "torch.aten.view"(%22306, %22312) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22313, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22314 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%22315 = "torch.prim.ListConstruct"(%18479, %22314) : (!torch.int, !torch.int) -> !torch.list<int>
%22316 = "torch.aten.view"(%22313, %22315) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22316, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%22317 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22318 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22319 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22320 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22321 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22322 = "torch.prim.ListConstruct"(%18479, %22317, %22318, %22319, %22320, %22321) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22323 = "torch.aten.view"(%22316, %22322) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22323, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22324 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22325 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22326 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22327 = "torch.prim.ListConstruct"(%18993, %22324, %22325, %22326) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22328 = "torch.aten.view"(%22323, %22327) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22328, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22329 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22330 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22331 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22332 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22333 = "torch.prim.ListConstruct"(%22329, %18477, %22330, %22331, %22332) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22334 = "torch.aten.view"(%21973, %22333) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22334, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22335 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22336 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22337 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22338 = "torch.prim.ListConstruct"(%19011, %22335, %22336, %22337) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22339 = "torch.aten.view"(%22334, %22338) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22339, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22340 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22341 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22342 = "torch.aten.add.Scalar"(%22278, %22340, %22341) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%22342, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%22343 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%22344 = "torch.aten.view"(%22342, %22343) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%22344, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%22345 = "torch.prim.ListConstruct"(%22344) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%22346 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22347 = "torch.aten.index_put"(%22328, %22345, %22339, %22346) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22347, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22348 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22349 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22350 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22351 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22352 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22353 = "torch.prim.ListConstruct"(%18479, %22348, %22349, %22350, %22351, %22352) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22354 = "torch.aten.view"(%22347, %22353) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22354, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22355 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%22356 = "torch.prim.ListConstruct"(%18479, %22355) : (!torch.int, !torch.int) -> !torch.list<int>
%22357 = "torch.aten.view"(%22354, %22356) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22357, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%22358 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%22359 = "torch.aten.unsqueeze"(%22273, %22358) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22359, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%22360 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22361 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22362 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22363 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22364 = "torch.prim.ListConstruct"(%22360, %18481, %22361, %22362, %22363) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22365 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22366 = "torch.aten.expand"(%22359, %22364, %22365) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22366, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%22367 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22368 = "torch.aten.clone"(%22366, %22367) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22368, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%22369 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22370 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22371 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22372 = "torch.prim.ListConstruct"(%22369, %18481, %22370, %22371) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22373 = "torch.aten._unsafe_view"(%22368, %22372) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22373, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22374 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%22375 = "torch.aten.unsqueeze"(%21973, %22374) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22375, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%22376 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22377 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22378 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22379 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22380 = "torch.prim.ListConstruct"(%22376, %18481, %22377, %22378, %22379) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22381 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22382 = "torch.aten.expand"(%22375, %22380, %22381) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22382, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%22383 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22384 = "torch.aten.clone"(%22382, %22383) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22384, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%22385 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22386 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22387 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22388 = "torch.prim.ListConstruct"(%22385, %18481, %22386, %22387) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22389 = "torch.aten._unsafe_view"(%22384, %22388) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22389, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22390 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22391 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22392 = "torch.aten.transpose.int"(%22123, %22390, %22391) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22392, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%22393 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22394 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22395 = "torch.aten.transpose.int"(%22373, %22393, %22394) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22395, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%22396 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22397 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22398 = "torch.aten.transpose.int"(%22389, %22396, %22397) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int) -> !torch.vtensor<[4,32,?,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22398, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>, !torch.int) -> ()
%22399 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22400 = "torch.aten.squeeze.dim"(%18570, %22399) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22400, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%22401 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22402 = "torch.aten.squeeze.dim"(%22400, %22401) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,1,?,?],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22402, %18474) <{shape_expressions = #map8}> : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>, !torch.int) -> ()
%22403 = "torch_c.to_builtin_tensor"(%22392) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%22404 = "torch_c.to_builtin_tensor"(%22395) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%22405 = "torch_c.to_builtin_tensor"(%22398) : (!torch.vtensor<[4,32,?,128],f8E4M3FNUZ>) -> tensor<4x32x?x128xf8E4M3FNUZ>
%22406 = "torch_c.to_builtin_tensor"(%22402) : (!torch.vtensor<[4,1,?,?],f8E4M3FNUZ>) -> tensor<4x1x?x?xf8E4M3FNUZ>
%22407 = "tensor.cast"(%22406) : (tensor<4x1x?x?xf8E4M3FNUZ>) -> tensor<?x?x?x?xf8E4M3FNUZ>
%22408 = "torch_c.to_builtin_tensor"(%17409) : (!torch.vtensor<[],f32>) -> tensor<f32>
%22409 = "util.call"(%22403, %22404, %22405, %22408, %22407) <{callee = @sharktank_masked_flash_attention_4_32_128_128_f8E4M3FNUZ_f32_f32}> : (tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<4x32x?x128xf8E4M3FNUZ>, tensor<f32>, tensor<?x?x?x?xf8E4M3FNUZ>) -> tensor<4x32x?x128xf32>
%22410 = "torch_c.from_builtin_tensor"(%22409) : (tensor<4x32x?x128xf32>) -> !torch.vtensor<[4,32,?,128],f32>
"torch.bind_symbolic_shape"(%22410, %18474) <{shape_expressions = #map27}> : (!torch.vtensor<[4,32,?,128],f32>, !torch.int) -> ()
%22411 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22412 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22413 = "torch.aten.transpose.int"(%22410, %22411, %22412) : (!torch.vtensor<[4,32,?,128],f32>, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%22413, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%22414 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22415 = "torch.aten.clone"(%22413, %22414) : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> !torch.vtensor<[4,?,32,128],f32>
"torch.bind_symbolic_shape"(%22415, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f32>, !torch.int) -> ()
%22416 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22417 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22418 = "torch.prim.ListConstruct"(%22416, %18481, %22417) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22419 = "torch.aten._unsafe_view"(%22415, %22418) : (!torch.vtensor<[4,?,32,128],f32>, !torch.list<int>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22419, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22420 = "torch.aten.div.Tensor"(%22419, %17411) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22420, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22421 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22422 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22423 = "torch.aten.clamp"(%22420, %22421, %22422) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22423, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22424 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22425 = "torch.prims.convert_element_type"(%22423, %22424) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22425, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%22426 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22427 = "torch.aten.unsqueeze"(%17413, %22426) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%22428 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22429 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22430 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22431 = "torch.prim.ListConstruct"(%22428, %22429, %22430) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22432 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22433 = "torch.aten.expand"(%22427, %22431, %22432) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%22434 = "torch_c.to_builtin_tensor"(%22425) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%22435 = "torch_c.to_builtin_tensor"(%22433) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%22436 = "util.call"(%22434, %22435) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%22437 = "torch_c.from_builtin_tensor"(%22436) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22437, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22438 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22439 = "torch.prims.convert_element_type"(%22437, %22438) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22439, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22440 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22441 = "torch.aten.add.Tensor"(%21867, %22439, %22440) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22441, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22442 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22443 = "torch.prims.convert_element_type"(%22441, %22442) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22443, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22444 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22445 = "torch.aten.pow.Tensor_Scalar"(%22443, %22444) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22445, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22446 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22447 = "torch.prim.ListConstruct"(%22446) : (!torch.int) -> !torch.list<int>
%22448 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%22449 = "torch.constant.none"() : () -> !torch.none
%22450 = "torch.aten.mean.dim"(%22445, %22447, %22448, %22449) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%22450, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%22451 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%22452 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22453 = "torch.aten.add.Scalar"(%22450, %22451, %22452) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%22453, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%22454 = "torch.aten.rsqrt"(%22453) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%22454, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%22455 = "torch.aten.mul.Tensor"(%22443, %22454) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22455, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22456 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22457 = "torch.prims.convert_element_type"(%22455, %22456) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22457, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22458 = "torch.aten.mul.Tensor"(%17415, %22457) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22458, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22459 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22460 = "torch.prims.convert_element_type"(%22458, %22459) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22460, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22461 = "torch.aten.div.Tensor"(%22460, %17417) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22461, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22462 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22463 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22464 = "torch.aten.clamp"(%22461, %22462, %22463) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22464, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22465 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22466 = "torch.prims.convert_element_type"(%22464, %22465) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22466, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%22467 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22468 = "torch.aten.unsqueeze"(%17419, %22467) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%22469 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22470 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%22471 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22472 = "torch.prim.ListConstruct"(%22469, %22470, %22471) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22473 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22474 = "torch.aten.expand"(%22468, %22472, %22473) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%22475 = "torch_c.to_builtin_tensor"(%22466) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%22476 = "torch_c.to_builtin_tensor"(%22474) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%22477 = "util.call"(%22475, %22476) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%22478 = "torch_c.from_builtin_tensor"(%22477) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%22478, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%22479 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22480 = "torch.prims.convert_element_type"(%22478, %22479) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%22480, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%22481 = "torch.aten.silu"(%22480) : (!torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%22481, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%22482 = "torch.aten.div.Tensor"(%22460, %17421) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22482, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22483 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22484 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22485 = "torch.aten.clamp"(%22482, %22483, %22484) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22485, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22486 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22487 = "torch.prims.convert_element_type"(%22485, %22486) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22487, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%22488 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22489 = "torch.aten.unsqueeze"(%17423, %22488) : (!torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,14336,4096],f8E4M3FNUZ>
%22490 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22491 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%22492 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22493 = "torch.prim.ListConstruct"(%22490, %22491, %22492) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22494 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22495 = "torch.aten.expand"(%22489, %22493, %22494) : (!torch.vtensor<[1,14336,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,14336,4096],f8E4M3FNUZ>
%22496 = "torch_c.to_builtin_tensor"(%22487) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%22497 = "torch_c.to_builtin_tensor"(%22495) : (!torch.vtensor<[4,14336,4096],f8E4M3FNUZ>) -> tensor<4x14336x4096xf8E4M3FNUZ>
%22498 = "util.call"(%22496, %22497) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x14336x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x14336x4096xf8E4M3FNUZ>) -> tensor<4x?x14336xf32>
%22499 = "torch_c.from_builtin_tensor"(%22498) : (tensor<4x?x14336xf32>) -> !torch.vtensor<[4,?,14336],f32>
"torch.bind_symbolic_shape"(%22499, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> ()
%22500 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22501 = "torch.prims.convert_element_type"(%22499, %22500) : (!torch.vtensor<[4,?,14336],f32>, !torch.int) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%22501, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%22502 = "torch.aten.mul.Tensor"(%22481, %22501) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[4,?,14336],bf16>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%22502, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%22503 = "torch.aten.div.Tensor"(%22502, %17425) : (!torch.vtensor<[4,?,14336],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%22503, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%22504 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22505 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22506 = "torch.aten.clamp"(%22503, %22504, %22505) : (!torch.vtensor<[4,?,14336],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,14336],bf16>
"torch.bind_symbolic_shape"(%22506, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> ()
%22507 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22508 = "torch.prims.convert_element_type"(%22506, %22507) : (!torch.vtensor<[4,?,14336],bf16>, !torch.int) -> !torch.vtensor<[4,?,14336],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22508, %18474) <{shape_expressions = #map28}> : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>, !torch.int) -> ()
%22509 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22510 = "torch.aten.unsqueeze"(%17427, %22509) : (!torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,14336],f8E4M3FNUZ>
%22511 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22512 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22513 = "torch.constant.int"() <{value = 14336 : i64}> : () -> !torch.int
%22514 = "torch.prim.ListConstruct"(%22511, %22512, %22513) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22515 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22516 = "torch.aten.expand"(%22510, %22514, %22515) : (!torch.vtensor<[1,4096,14336],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,14336],f8E4M3FNUZ>
%22517 = "torch_c.to_builtin_tensor"(%22508) : (!torch.vtensor<[4,?,14336],f8E4M3FNUZ>) -> tensor<4x?x14336xf8E4M3FNUZ>
%22518 = "torch_c.to_builtin_tensor"(%22516) : (!torch.vtensor<[4,4096,14336],f8E4M3FNUZ>) -> tensor<4x4096x14336xf8E4M3FNUZ>
%22519 = "util.call"(%22517, %22518) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx14336xf8E4M3FNUZ_R4x4096x14336xf8E4M3FNUZ}> : (tensor<4x?x14336xf8E4M3FNUZ>, tensor<4x4096x14336xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%22520 = "torch_c.from_builtin_tensor"(%22519) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22520, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22521 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22522 = "torch.prims.convert_element_type"(%22520, %22521) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22522, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22523 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22524 = "torch.aten.add.Tensor"(%22441, %22522, %22523) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22524, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22525 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22526 = "torch.prims.convert_element_type"(%22524, %22525) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22526, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22527 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22528 = "torch.aten.pow.Tensor_Scalar"(%22526, %22527) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22528, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22529 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22530 = "torch.prim.ListConstruct"(%22529) : (!torch.int) -> !torch.list<int>
%22531 = "torch.constant.bool"() <{value = true}> : () -> !torch.bool
%22532 = "torch.constant.none"() : () -> !torch.none
%22533 = "torch.aten.mean.dim"(%22528, %22530, %22531, %22532) : (!torch.vtensor<[4,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%22533, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%22534 = "torch.constant.float"() <{value = 1.000000e-05 : f64}> : () -> !torch.float
%22535 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22536 = "torch.aten.add.Scalar"(%22533, %22534, %22535) : (!torch.vtensor<[4,?,1],f32>, !torch.float, !torch.int) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%22536, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%22537 = "torch.aten.rsqrt"(%22536) : (!torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,1],f32>
"torch.bind_symbolic_shape"(%22537, %18474) <{shape_expressions = #map10}> : (!torch.vtensor<[4,?,1],f32>, !torch.int) -> ()
%22538 = "torch.aten.mul.Tensor"(%22526, %22537) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[4,?,1],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22538, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22539 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22540 = "torch.prims.convert_element_type"(%22538, %22539) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22540, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22541 = "torch.aten.mul.Tensor"(%17429, %22540) : (!torch.vtensor<[4096],bf16>, !torch.vtensor<[4,?,4096],bf16>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22541, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22542 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22543 = "torch.prims.convert_element_type"(%22541, %22542) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22543, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22544 = "torch.aten.div.Tensor"(%22543, %17431) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22544, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22545 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22546 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22547 = "torch.aten.clamp"(%22544, %22545, %22546) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22547, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22548 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22549 = "torch.prims.convert_element_type"(%22547, %22548) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22549, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%22550 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22551 = "torch.aten.unsqueeze"(%17433, %22550) : (!torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,4096,4096],f8E4M3FNUZ>
%22552 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22553 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22554 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22555 = "torch.prim.ListConstruct"(%22552, %22553, %22554) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22556 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22557 = "torch.aten.expand"(%22551, %22555, %22556) : (!torch.vtensor<[1,4096,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,4096,4096],f8E4M3FNUZ>
%22558 = "torch_c.to_builtin_tensor"(%22549) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%22559 = "torch_c.to_builtin_tensor"(%22557) : (!torch.vtensor<[4,4096,4096],f8E4M3FNUZ>) -> tensor<4x4096x4096xf8E4M3FNUZ>
%22560 = "util.call"(%22558, %22559) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x4096x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x4096x4096xf8E4M3FNUZ>) -> tensor<4x?x4096xf32>
%22561 = "torch_c.from_builtin_tensor"(%22560) : (tensor<4x?x4096xf32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22561, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22562 = "torch.aten.div.Tensor"(%22561, %17435) : (!torch.vtensor<[4,?,4096],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22562, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22563 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22564 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22565 = "torch.aten.clamp"(%22562, %22563, %22564) : (!torch.vtensor<[4,?,4096],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],f32>
"torch.bind_symbolic_shape"(%22565, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> ()
%22566 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22567 = "torch.prims.convert_element_type"(%22565, %22566) : (!torch.vtensor<[4,?,4096],f32>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22567, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%22568 = "torch.aten.div.Tensor"(%22543, %17437) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22568, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22569 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22570 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22571 = "torch.aten.clamp"(%22568, %22569, %22570) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22571, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22572 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22573 = "torch.prims.convert_element_type"(%22571, %22572) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22573, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%22574 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22575 = "torch.aten.unsqueeze"(%17439, %22574) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%22576 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22577 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%22578 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22579 = "torch.prim.ListConstruct"(%22576, %22577, %22578) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22580 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22581 = "torch.aten.expand"(%22575, %22579, %22580) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%22582 = "torch_c.to_builtin_tensor"(%22573) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%22583 = "torch_c.to_builtin_tensor"(%22581) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%22584 = "util.call"(%22582, %22583) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%22585 = "torch_c.from_builtin_tensor"(%22584) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%22585, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%22586 = "torch.aten.div.Tensor"(%22585, %17441) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%22586, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%22587 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22588 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22589 = "torch.aten.clamp"(%22586, %22587, %22588) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%22589, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%22590 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22591 = "torch.prims.convert_element_type"(%22589, %22590) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22591, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%22592 = "torch.aten.div.Tensor"(%22543, %17443) : (!torch.vtensor<[4,?,4096],bf16>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22592, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22593 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22594 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22595 = "torch.aten.clamp"(%22592, %22593, %22594) : (!torch.vtensor<[4,?,4096],bf16>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,4096],bf16>
"torch.bind_symbolic_shape"(%22595, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> ()
%22596 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22597 = "torch.prims.convert_element_type"(%22595, %22596) : (!torch.vtensor<[4,?,4096],bf16>, !torch.int) -> !torch.vtensor<[4,?,4096],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22597, %18474) <{shape_expressions = #map9}> : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.int) -> ()
%22598 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22599 = "torch.aten.unsqueeze"(%17445, %22598) : (!torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[1,1024,4096],f8E4M3FNUZ>
%22600 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22601 = "torch.constant.int"() <{value = 1024 : i64}> : () -> !torch.int
%22602 = "torch.constant.int"() <{value = 4096 : i64}> : () -> !torch.int
%22603 = "torch.prim.ListConstruct"(%22600, %22601, %22602) : (!torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22604 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22605 = "torch.aten.expand"(%22599, %22603, %22604) : (!torch.vtensor<[1,1024,4096],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,1024,4096],f8E4M3FNUZ>
%22606 = "torch_c.to_builtin_tensor"(%22597) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>) -> tensor<4x?x4096xf8E4M3FNUZ>
%22607 = "torch_c.to_builtin_tensor"(%22605) : (!torch.vtensor<[4,1024,4096],f8E4M3FNUZ>) -> tensor<4x1024x4096xf8E4M3FNUZ>
%22608 = "util.call"(%22606, %22607) <{callee = @sharktank_batch_matmul_transpose_b_L4xDx4096xf8E4M3FNUZ_R4x1024x4096xf8E4M3FNUZ}> : (tensor<4x?x4096xf8E4M3FNUZ>, tensor<4x1024x4096xf8E4M3FNUZ>) -> tensor<4x?x1024xf32>
%22609 = "torch_c.from_builtin_tensor"(%22608) : (tensor<4x?x1024xf32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%22609, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%22610 = "torch.aten.div.Tensor"(%22609, %17447) : (!torch.vtensor<[4,?,1024],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%22610, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%22611 = "torch.constant.float"() <{value = -2.400000e+02 : f64}> : () -> !torch.float
%22612 = "torch.constant.float"() <{value = 2.400000e+02 : f64}> : () -> !torch.float
%22613 = "torch.aten.clamp"(%22610, %22611, %22612) : (!torch.vtensor<[4,?,1024],f32>, !torch.float, !torch.float) -> !torch.vtensor<[4,?,1024],f32>
"torch.bind_symbolic_shape"(%22613, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> ()
%22614 = "torch.constant.int"() <{value = 26 : i64}> : () -> !torch.int
%22615 = "torch.prims.convert_element_type"(%22613, %22614) : (!torch.vtensor<[4,?,1024],f32>, !torch.int) -> !torch.vtensor<[4,?,1024],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22615, %18474) <{shape_expressions = #map11}> : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.int) -> ()
%22616 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22617 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22618 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22619 = "torch.prim.ListConstruct"(%22616, %18481, %22617, %22618) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22620 = "torch.aten.view"(%22567, %22619) : (!torch.vtensor<[4,?,4096],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22620, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22621 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22622 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22623 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22624 = "torch.prim.ListConstruct"(%22621, %18481, %22622, %22623) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22625 = "torch.aten.view"(%22591, %22624) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22625, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22626 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22627 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22628 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22629 = "torch.prim.ListConstruct"(%22626, %18481, %22627, %22628) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22630 = "torch.aten.view"(%22615, %22629) : (!torch.vtensor<[4,?,1024],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22630, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22631 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%22632 = "torch.constant.none"() : () -> !torch.none
%22633 = "torch.constant.none"() : () -> !torch.none
%22634 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%22635 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22636 = "torch.aten.arange"(%22631, %22632, %22633, %22634, %22635) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%22637 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22638 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22639 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22640 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22641 = "torch.constant.none"() : () -> !torch.none
%22642 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%22643 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22644 = "torch.aten.arange.start_step"(%22637, %22638, %22639, %22640, %22641, %22642, %22643) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%22645 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22646 = "torch.prims.convert_element_type"(%22644, %22645) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%22647 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22648 = "torch.aten.div.Scalar"(%22646, %22647) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22649 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%22650 = "torch.aten.pow.Scalar"(%22649, %22648) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22651 = "torch.aten.reciprocal"(%22650) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22652 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%22653 = "torch.aten.mul.Scalar"(%22651, %22652) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%22654 = "torch.aten.reciprocal"(%22653) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22655 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%22656 = "torch.aten.mul.Scalar"(%22654, %22655) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%22657 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%22658 = "torch.aten.gt.Scalar"(%22656, %22657) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22659 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22660 = "torch.aten.div.Scalar"(%22653, %22659) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22661 = "torch.aten.where.self"(%22658, %22660, %22653) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22662 = "torch.aten.reciprocal"(%22656) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22663 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%22664 = "torch.aten.mul.Scalar"(%22662, %22663) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22665 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22666 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22667 = "torch.aten.sub.Scalar"(%22664, %22665, %22666) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%22668 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22669 = "torch.aten.div.Scalar"(%22667, %22668) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22670 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22671 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22672 = "torch.aten.rsub.Scalar"(%22669, %22670, %22671) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%22673 = "torch.aten.mul.Tensor"(%22672, %22661) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22674 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22675 = "torch.aten.div.Scalar"(%22673, %22674) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22676 = "torch.aten.mul.Tensor"(%22669, %22661) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22677 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22678 = "torch.aten.add.Tensor"(%22675, %22676, %22677) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22679 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%22680 = "torch.aten.lt.Scalar"(%22656, %22679) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22681 = "torch.aten.bitwise_not"(%22680) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22682 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%22683 = "torch.aten.gt.Scalar"(%22656, %22682) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22684 = "torch.aten.bitwise_not"(%22683) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22685 = "torch.aten.mul.Tensor"(%22681, %22684) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22686 = "torch.aten.where.self"(%22685, %22678, %22661) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22687 = "torch.prim.ListConstruct"(%22686, %22686) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%22688 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22689 = "torch.aten.cat"(%22687, %22688) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%22690 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22691 = "torch.prims.convert_element_type"(%22636, %22690) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%22692 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22693 = "torch.prims.convert_element_type"(%22689, %22692) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%22694 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%22695 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22696 = "torch.prim.ListConstruct"(%22694, %22695) : (!torch.int, !torch.int) -> !torch.list<int>
%22697 = "torch.aten.view"(%22691, %22696) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%22698 = "torch.aten.mul.Tensor"(%22697, %22693) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%22699 = "torch.aten.cos"(%22698) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%22700 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22701 = "torch.prims.convert_element_type"(%22699, %22700) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%22702 = "torch.aten.sin"(%22698) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%22703 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22704 = "torch.prims.convert_element_type"(%22702, %22703) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%22705 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22706 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22707 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22708 = "torch.aten.slice.Tensor"(%22701, %22705, %22706, %18481, %22707) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22708, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22709 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22710 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22711 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22712 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22713 = "torch.aten.slice.Tensor"(%22708, %22709, %22710, %22711, %22712) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22713, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22714 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22715 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22716 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22717 = "torch.aten.slice.Tensor"(%22704, %22714, %22715, %18481, %22716) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22717, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22718 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22719 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22720 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22721 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22722 = "torch.aten.slice.Tensor"(%22717, %22718, %22719, %22720, %22721) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22722, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22723 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22724 = "torch.aten.unsqueeze"(%22713, %22723) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22724, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22725 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22726 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22727 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22728 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22729 = "torch.aten.slice.Tensor"(%22724, %22725, %22726, %22727, %22728) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22729, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22730 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22731 = "torch.aten.unsqueeze"(%22729, %22730) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22731, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22732 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22733 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22734 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22735 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22736 = "torch.aten.slice.Tensor"(%22731, %22732, %22733, %22734, %22735) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22736, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22737 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22738 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22739 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22740 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22741 = "torch.prim.ListConstruct"(%22737, %22738, %22739, %22740) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22742 = "torch.aten.repeat"(%22736, %22741) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22742, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%22743 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22744 = "torch.aten.unsqueeze"(%22722, %22743) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22744, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22745 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22746 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22747 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22748 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22749 = "torch.aten.slice.Tensor"(%22744, %22745, %22746, %22747, %22748) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22749, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22750 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22751 = "torch.aten.unsqueeze"(%22749, %22750) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22751, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22752 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22753 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22754 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22755 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22756 = "torch.aten.slice.Tensor"(%22751, %22752, %22753, %22754, %22755) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22756, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22757 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22758 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22759 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22760 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22761 = "torch.prim.ListConstruct"(%22757, %22758, %22759, %22760) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22762 = "torch.aten.repeat"(%22756, %22761) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22762, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%22763 = "torch.aten.mul.Tensor"(%22620, %22742) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22763, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22764 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22765 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22766 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22767 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22768 = "torch.aten.slice.Tensor"(%22620, %22764, %22765, %22766, %22767) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22768, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%22769 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22770 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22771 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22772 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22773 = "torch.aten.slice.Tensor"(%22620, %22769, %22770, %22771, %22772) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22773, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%22774 = "torch.aten.neg"(%22773) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22774, %18474) <{shape_expressions = #map18}> : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.int) -> ()
%22775 = "torch.prim.ListConstruct"(%22774, %22768) : (!torch.vtensor<[4,?,32,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%22776 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22777 = "torch.aten.cat"(%22775, %22776) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22777, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22778 = "torch.aten.mul.Tensor"(%22777, %22762) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22778, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22779 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22780 = "torch.aten.add.Tensor"(%22763, %22778, %22779) : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,32,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22780, %18474) <{shape_expressions = #map12}> : (!torch.vtensor<[4,?,32,128],f8E4M3FNUZ>, !torch.int) -> ()
%22781 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%22782 = "torch.constant.none"() : () -> !torch.none
%22783 = "torch.constant.none"() : () -> !torch.none
%22784 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%22785 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22786 = "torch.aten.arange"(%22781, %22782, %22783, %22784, %22785) : (!torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[131072],si64>
%22787 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22788 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22789 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22790 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22791 = "torch.constant.none"() : () -> !torch.none
%22792 = "torch.constant.device"() <{value = "cpu"}> : () -> !torch.Device
%22793 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22794 = "torch.aten.arange.start_step"(%22787, %22788, %22789, %22790, %22791, %22792, %22793) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool) -> !torch.vtensor<[64],si64>
%22795 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22796 = "torch.prims.convert_element_type"(%22794, %22795) : (!torch.vtensor<[64],si64>, !torch.int) -> !torch.vtensor<[64],f32>
%22797 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22798 = "torch.aten.div.Scalar"(%22796, %22797) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22799 = "torch.constant.float"() <{value = 5.000000e+05 : f64}> : () -> !torch.float
%22800 = "torch.aten.pow.Scalar"(%22799, %22798) : (!torch.float, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22801 = "torch.aten.reciprocal"(%22800) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22802 = "torch.constant.float"() <{value = 1.000000e+00 : f64}> : () -> !torch.float
%22803 = "torch.aten.mul.Scalar"(%22801, %22802) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%22804 = "torch.aten.reciprocal"(%22803) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22805 = "torch.constant.float"() <{value = 6.2831853071795862 : f64}> : () -> !torch.float
%22806 = "torch.aten.mul.Scalar"(%22804, %22805) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],f32>
%22807 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%22808 = "torch.aten.gt.Scalar"(%22806, %22807) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22809 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22810 = "torch.aten.div.Scalar"(%22803, %22809) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22811 = "torch.aten.where.self"(%22808, %22810, %22803) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22812 = "torch.aten.reciprocal"(%22806) : (!torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22813 = "torch.constant.int"() <{value = 8192 : i64}> : () -> !torch.int
%22814 = "torch.aten.mul.Scalar"(%22812, %22813) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22815 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22816 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22817 = "torch.aten.sub.Scalar"(%22814, %22815, %22816) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%22818 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22819 = "torch.aten.div.Scalar"(%22817, %22818) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22820 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22821 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22822 = "torch.aten.rsub.Scalar"(%22819, %22820, %22821) : (!torch.vtensor<[64],f32>, !torch.int, !torch.int) -> !torch.vtensor<[64],f32>
%22823 = "torch.aten.mul.Tensor"(%22822, %22811) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22824 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22825 = "torch.aten.div.Scalar"(%22823, %22824) : (!torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22826 = "torch.aten.mul.Tensor"(%22819, %22811) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22827 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22828 = "torch.aten.add.Tensor"(%22825, %22826, %22827) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int) -> !torch.vtensor<[64],f32>
%22829 = "torch.constant.float"() <{value = 2.048000e+03 : f64}> : () -> !torch.float
%22830 = "torch.aten.lt.Scalar"(%22806, %22829) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22831 = "torch.aten.bitwise_not"(%22830) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22832 = "torch.constant.float"() <{value = 8.192000e+03 : f64}> : () -> !torch.float
%22833 = "torch.aten.gt.Scalar"(%22806, %22832) : (!torch.vtensor<[64],f32>, !torch.float) -> !torch.vtensor<[64],i1>
%22834 = "torch.aten.bitwise_not"(%22833) : (!torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22835 = "torch.aten.mul.Tensor"(%22831, %22834) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],i1>) -> !torch.vtensor<[64],i1>
%22836 = "torch.aten.where.self"(%22835, %22828, %22811) : (!torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.vtensor<[64],f32>
%22837 = "torch.prim.ListConstruct"(%22836, %22836) : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor>
%22838 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22839 = "torch.aten.cat"(%22837, %22838) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[128],f32>
%22840 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22841 = "torch.prims.convert_element_type"(%22786, %22840) : (!torch.vtensor<[131072],si64>, !torch.int) -> !torch.vtensor<[131072],f32>
%22842 = "torch.constant.int"() <{value = 6 : i64}> : () -> !torch.int
%22843 = "torch.prims.convert_element_type"(%22839, %22842) : (!torch.vtensor<[128],f32>, !torch.int) -> !torch.vtensor<[128],f32>
%22844 = "torch.constant.int"() <{value = 131072 : i64}> : () -> !torch.int
%22845 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22846 = "torch.prim.ListConstruct"(%22844, %22845) : (!torch.int, !torch.int) -> !torch.list<int>
%22847 = "torch.aten.view"(%22841, %22846) : (!torch.vtensor<[131072],f32>, !torch.list<int>) -> !torch.vtensor<[131072,1],f32>
%22848 = "torch.aten.mul.Tensor"(%22847, %22843) : (!torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32>) -> !torch.vtensor<[131072,128],f32>
%22849 = "torch.aten.cos"(%22848) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%22850 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22851 = "torch.prims.convert_element_type"(%22849, %22850) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%22852 = "torch.aten.sin"(%22848) : (!torch.vtensor<[131072,128],f32>) -> !torch.vtensor<[131072,128],f32>
%22853 = "torch.constant.int"() <{value = 15 : i64}> : () -> !torch.int
%22854 = "torch.prims.convert_element_type"(%22852, %22853) : (!torch.vtensor<[131072,128],f32>, !torch.int) -> !torch.vtensor<[131072,128],bf16>
%22855 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22856 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22857 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22858 = "torch.aten.slice.Tensor"(%22851, %22855, %22856, %18481, %22857) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22858, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22859 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22860 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22861 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22862 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22863 = "torch.aten.slice.Tensor"(%22858, %22859, %22860, %22861, %22862) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22863, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22864 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22865 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22866 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22867 = "torch.aten.slice.Tensor"(%22854, %22864, %22865, %18481, %22866) : (!torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22867, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22868 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22869 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22870 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22871 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22872 = "torch.aten.slice.Tensor"(%22867, %22868, %22869, %22870, %22871) : (!torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[?,128],bf16>
"torch.bind_symbolic_shape"(%22872, %18474) <{shape_expressions = #map14}> : (!torch.vtensor<[?,128],bf16>, !torch.int) -> ()
%22873 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22874 = "torch.aten.unsqueeze"(%22863, %22873) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22874, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22875 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22876 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22877 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22878 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22879 = "torch.aten.slice.Tensor"(%22874, %22875, %22876, %22877, %22878) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22879, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22880 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22881 = "torch.aten.unsqueeze"(%22879, %22880) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22881, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22882 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22883 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22884 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22885 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22886 = "torch.aten.slice.Tensor"(%22881, %22882, %22883, %22884, %22885) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22886, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22887 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22888 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22889 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22890 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22891 = "torch.prim.ListConstruct"(%22887, %22888, %22889, %22890) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22892 = "torch.aten.repeat"(%22886, %22891) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22892, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%22893 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22894 = "torch.aten.unsqueeze"(%22872, %22893) : (!torch.vtensor<[?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22894, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22895 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22896 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22897 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22898 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22899 = "torch.aten.slice.Tensor"(%22894, %22895, %22896, %22897, %22898) : (!torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,128],bf16>
"torch.bind_symbolic_shape"(%22899, %18474) <{shape_expressions = #map15}> : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> ()
%22900 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22901 = "torch.aten.unsqueeze"(%22899, %22900) : (!torch.vtensor<[1,?,128],bf16>, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22901, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22902 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22903 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22904 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22905 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22906 = "torch.aten.slice.Tensor"(%22901, %22902, %22903, %22904, %22905) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[1,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22906, %18474) <{shape_expressions = #map16}> : (!torch.vtensor<[1,?,1,128],bf16>, !torch.int) -> ()
%22907 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22908 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22909 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22910 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22911 = "torch.prim.ListConstruct"(%22907, %22908, %22909, %22910) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22912 = "torch.aten.repeat"(%22906, %22911) : (!torch.vtensor<[1,?,1,128],bf16>, !torch.list<int>) -> !torch.vtensor<[4,?,1,128],bf16>
"torch.bind_symbolic_shape"(%22912, %18474) <{shape_expressions = #map17}> : (!torch.vtensor<[4,?,1,128],bf16>, !torch.int) -> ()
%22913 = "torch.aten.mul.Tensor"(%22625, %22892) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22913, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22914 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22915 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%22916 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22917 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22918 = "torch.aten.slice.Tensor"(%22625, %22914, %22915, %22916, %22917) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22918, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%22919 = "torch.constant.int"() <{value = 3 : i64}> : () -> !torch.int
%22920 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22921 = "torch.constant.int"() <{value = 9223372036854775807 : i64}> : () -> !torch.int
%22922 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22923 = "torch.aten.slice.Tensor"(%22625, %22919, %22920, %22921, %22922) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22923, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%22924 = "torch.aten.neg"(%22923) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22924, %18474) <{shape_expressions = #map19}> : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.int) -> ()
%22925 = "torch.prim.ListConstruct"(%22924, %22918) : (!torch.vtensor<[4,?,8,64],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,64],f8E4M3FNUZ>) -> !torch.list<vtensor>
%22926 = "torch.constant.int"() <{value = -1 : i64}> : () -> !torch.int
%22927 = "torch.aten.cat"(%22925, %22926) : (!torch.list<vtensor>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22927, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22928 = "torch.aten.mul.Tensor"(%22927, %22912) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,1,128],bf16>) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22928, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22929 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22930 = "torch.aten.add.Tensor"(%22913, %22928, %22929) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22930, %18474) <{shape_expressions = #map13}> : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22931 = "torch.constant.int"() <{value = 64 : i64}> : () -> !torch.int
%22932 = "torch.aten.mul.Scalar"(%arg69, %22931) : (!torch.vtensor<[4,?],si64>, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%22932, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%22933 = "torch.constant.int"() <{value = 12 : i64}> : () -> !torch.int
%22934 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22935 = "torch.aten.add.Scalar"(%22932, %22933, %22934) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%22935, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%22936 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22937 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22938 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22939 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22940 = "torch.prim.ListConstruct"(%22936, %18477, %22937, %22938, %22939) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22941 = "torch.aten.view"(%22930, %22940) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22941, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22942 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22943 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22944 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22945 = "torch.prim.ListConstruct"(%19011, %22942, %22943, %22944) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22946 = "torch.aten.view"(%22941, %22945) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22946, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22947 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%22948 = "torch.aten.view"(%22935, %22947) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%22948, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%22949 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22950 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22951 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22952 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22953 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22954 = "torch.prim.ListConstruct"(%18479, %22949, %22950, %22951, %22952, %22953) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22955 = "torch.aten.view"(%22357, %22954) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22955, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22956 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22957 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22958 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22959 = "torch.prim.ListConstruct"(%18993, %22956, %22957, %22958) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22960 = "torch.aten.view"(%22955, %22959) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22960, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22961 = "torch.prim.ListConstruct"(%22948) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%22962 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%22963 = "torch.aten.index_put"(%22960, %22961, %22946, %22962) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22963, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22964 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22965 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22966 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22967 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22968 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22969 = "torch.prim.ListConstruct"(%18479, %22964, %22965, %22966, %22967, %22968) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22970 = "torch.aten.view"(%22963, %22969) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22970, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22971 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%22972 = "torch.prim.ListConstruct"(%18479, %22971) : (!torch.int, !torch.int) -> !torch.list<int>
%22973 = "torch.aten.view"(%22970, %22972) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22973, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%22974 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22975 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%22976 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22977 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22978 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22979 = "torch.prim.ListConstruct"(%18479, %22974, %22975, %22976, %22977, %22978) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22980 = "torch.aten.view"(%22973, %22979) : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22980, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22981 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22982 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22983 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22984 = "torch.prim.ListConstruct"(%18993, %22981, %22982, %22983) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22985 = "torch.aten.view"(%22980, %22984) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22985, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22986 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%22987 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22988 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22989 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22990 = "torch.prim.ListConstruct"(%22986, %18477, %22987, %22988, %22989) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22991 = "torch.aten.view"(%22630, %22990) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22991, %18474) <{shape_expressions = #map22}> : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22992 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%22993 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%22994 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%22995 = "torch.prim.ListConstruct"(%19011, %22992, %22993, %22994) : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%22996 = "torch.aten.view"(%22991, %22995) : (!torch.vtensor<[4,?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%22996, %18474) <{shape_expressions = #map23}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%22997 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22998 = "torch.constant.int"() <{value = 1 : i64}> : () -> !torch.int
%22999 = "torch.aten.add.Scalar"(%22935, %22997, %22998) : (!torch.vtensor<[4,?],si64>, !torch.int, !torch.int) -> !torch.vtensor<[4,?],si64>
"torch.bind_symbolic_shape"(%22999, %18474) <{shape_expressions = #map1}> : (!torch.vtensor<[4,?],si64>, !torch.int) -> ()
%23000 = "torch.prim.ListConstruct"(%19011) : (!torch.int) -> !torch.list<int>
%23001 = "torch.aten.view"(%22999, %23000) : (!torch.vtensor<[4,?],si64>, !torch.list<int>) -> !torch.vtensor<[?],si64>
"torch.bind_symbolic_shape"(%23001, %18474) <{shape_expressions = #map24}> : (!torch.vtensor<[?],si64>, !torch.int) -> ()
%23002 = "torch.prim.ListConstruct"(%23001) : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>>
%23003 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%23004 = "torch.aten.index_put"(%22985, %23002, %22996, %23003) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool) -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%23004, %18475) <{shape_expressions = #map21}> : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%23005 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%23006 = "torch.constant.int"() <{value = 2 : i64}> : () -> !torch.int
%23007 = "torch.constant.int"() <{value = 32 : i64}> : () -> !torch.int
%23008 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%23009 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%23010 = "torch.prim.ListConstruct"(%18479, %23005, %23006, %23007, %23008, %23009) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%23011 = "torch.aten.view"(%23004, %23010) : (!torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%23011, %18475) <{shape_expressions = #map20}> : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.int) -> ()
%23012 = "torch.constant.int"() <{value = 2097152 : i64}> : () -> !torch.int
%23013 = "torch.prim.ListConstruct"(%18479, %23012) : (!torch.int, !torch.int) -> !torch.list<int>
%23014 = "torch.aten.view"(%23011, %23013) : (!torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int>) -> !torch.vtensor<[?,2097152],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%23014, %18475) <{shape_expressions = #map2}> : (!torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int) -> ()
%23015 = "torch.constant.int"() <{value = -2 : i64}> : () -> !torch.int
%23016 = "torch.aten.unsqueeze"(%22930, %23015) : (!torch.vtensor<[4,?,8,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%23016, %18474) <{shape_expressions = #map25}> : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.int) -> ()
%23017 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%23018 = "torch.constant.int"() <{value = 8 : i64}> : () -> !torch.int
%23019 = "torch.constant.int"() <{value = 4 : i64}> : () -> !torch.int
%23020 = "torch.constant.int"() <{value = 128 : i64}> : () -> !torch.int
%23021 = "torch.prim.ListConstruct"(%23017, %18481, %23018, %23019, %23020) : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int>
%23022 = "torch.constant.bool"() <{value = false}> : () -> !torch.bool
%23023 = "torch.aten.expand"(%23016, %23021, %23022) : (!torch.vtensor<[4,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%23023, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%23024 = "torch.constant.int"() <{value = 0 : i64}> : () -> !torch.int
%23025 = "torch.aten.clone"(%23023, %23024) : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> !torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>
"torch.bind_symbolic_shape"(%23025, %18474) <{shape_expressions = #map26}> : (!torch.vtensor<[4,?,8,4,128],f8E4M3FNUZ>, !torch.int) -> ()
%23026 = "t
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment