Created
January 23, 2025 16:56
-
-
Save AmosLewis/3da26fd016b968cfebc98b309f5d5845 to your computer and use it in GitHub Desktop.
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module @module { | |
util.global private @__auto.token_embd.weight = #stream.parameter.named<"model"::"token_embd.weight"> : tensor<128256x4096xbf16> | |
util.global private @__auto.blk.0.attn_norm.weight = #stream.parameter.named<"model"::"blk.0.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.0.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.0.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.0.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.0.ffn_norm.weight = #stream.parameter.named<"model"::"blk.0.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.0.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.0.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.0.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.0.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.0.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.0.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.0.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.1.attn_norm.weight = #stream.parameter.named<"model"::"blk.1.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.1.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.1.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.1.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.1.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.1.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.1.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.1.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.1.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.1.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.1.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.1.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.1.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.1.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.1.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.1.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.1.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.1.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.1.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.1.ffn_norm.weight = #stream.parameter.named<"model"::"blk.1.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.1.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.1.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.1.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.1.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.1.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.1.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.1.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.1.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.1.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.1.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.1.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.1.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.2.attn_norm.weight = #stream.parameter.named<"model"::"blk.2.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.2.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.2.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.2.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.2.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.2.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.2.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.2.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.2.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.2.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.2.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.2.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.2.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.2.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.2.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.2.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.2.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.2.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.2.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.2.ffn_norm.weight = #stream.parameter.named<"model"::"blk.2.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.2.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.2.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.2.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.2.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.2.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.2.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.2.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.2.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.2.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.2.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.2.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.2.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.3.attn_norm.weight = #stream.parameter.named<"model"::"blk.3.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.3.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.3.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.3.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.3.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.3.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.3.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.3.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.3.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.3.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.3.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.3.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.3.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.3.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.3.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.3.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.3.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.3.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.3.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.3.ffn_norm.weight = #stream.parameter.named<"model"::"blk.3.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.3.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.3.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.3.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.3.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.3.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.3.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.3.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.3.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.3.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.3.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.3.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.3.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.4.attn_norm.weight = #stream.parameter.named<"model"::"blk.4.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.4.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.4.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.4.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.4.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.4.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.4.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.4.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.4.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.4.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.4.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.4.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.4.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.4.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.4.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.4.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.4.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.4.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.4.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.4.ffn_norm.weight = #stream.parameter.named<"model"::"blk.4.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.4.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.4.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.4.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.4.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.4.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.4.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.4.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.4.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.4.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.4.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.4.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.4.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.5.attn_norm.weight = #stream.parameter.named<"model"::"blk.5.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.5.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.5.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.5.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.5.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.5.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.5.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.5.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.5.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.5.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.5.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.5.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.5.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.5.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.5.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.5.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.5.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.5.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.5.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.5.ffn_norm.weight = #stream.parameter.named<"model"::"blk.5.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.5.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.5.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.5.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.5.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.5.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.5.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.5.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.5.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.5.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.5.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.5.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.5.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.6.attn_norm.weight = #stream.parameter.named<"model"::"blk.6.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.6.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.6.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.6.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.6.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.6.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.6.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.6.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.6.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.6.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.6.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.6.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.6.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.6.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.6.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.6.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.6.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.6.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.6.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.6.ffn_norm.weight = #stream.parameter.named<"model"::"blk.6.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.6.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.6.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.6.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.6.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.6.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.6.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.6.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.6.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.6.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.6.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.6.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.6.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.7.attn_norm.weight = #stream.parameter.named<"model"::"blk.7.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.7.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.7.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.7.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.7.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.7.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.7.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.7.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.7.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.7.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.7.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.7.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.7.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.7.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.7.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.7.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.7.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.7.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.7.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.7.ffn_norm.weight = #stream.parameter.named<"model"::"blk.7.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.7.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.7.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.7.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.7.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.7.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.7.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.7.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.7.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.7.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.7.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.7.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.7.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.8.attn_norm.weight = #stream.parameter.named<"model"::"blk.8.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.8.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.8.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.8.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.8.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.8.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.8.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.8.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.8.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.8.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.8.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.8.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.8.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.8.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.8.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.8.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.8.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.8.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.8.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.8.ffn_norm.weight = #stream.parameter.named<"model"::"blk.8.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.8.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.8.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.8.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.8.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.8.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.8.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.8.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.8.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.8.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.8.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.8.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.8.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.9.attn_norm.weight = #stream.parameter.named<"model"::"blk.9.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.9.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.9.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.9.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.9.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.9.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.9.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.9.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.9.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.9.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.9.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.9.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.9.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.9.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.9.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.9.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.9.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.9.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.9.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.9.ffn_norm.weight = #stream.parameter.named<"model"::"blk.9.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.9.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.9.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.9.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.9.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.9.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.9.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.9.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.9.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.9.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.9.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.9.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.9.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.10.attn_norm.weight = #stream.parameter.named<"model"::"blk.10.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.10.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.10.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.10.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.10.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.10.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.10.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.10.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.10.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.10.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.10.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.10.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.10.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.10.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.10.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.10.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.10.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.10.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.10.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.10.ffn_norm.weight = #stream.parameter.named<"model"::"blk.10.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.10.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.10.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.10.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.10.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.10.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.10.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.10.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.10.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.10.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.10.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.10.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.10.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.11.attn_norm.weight = #stream.parameter.named<"model"::"blk.11.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.11.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.11.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.11.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.11.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.11.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.11.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.11.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.11.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.11.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.11.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.11.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.11.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.11.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.11.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.11.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.11.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.11.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.11.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.11.ffn_norm.weight = #stream.parameter.named<"model"::"blk.11.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.11.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.11.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.11.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.11.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.11.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.11.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.11.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.11.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.11.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.11.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.11.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.11.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.12.attn_norm.weight = #stream.parameter.named<"model"::"blk.12.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.12.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.12.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.12.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.12.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.12.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.12.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.12.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.12.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.12.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.12.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.12.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.12.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.12.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.12.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.12.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.12.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.12.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.12.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.12.ffn_norm.weight = #stream.parameter.named<"model"::"blk.12.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.12.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.12.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.12.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.12.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.12.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.12.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.12.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.12.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.12.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.12.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.12.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.12.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.13.attn_norm.weight = #stream.parameter.named<"model"::"blk.13.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.13.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.13.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.13.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.13.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.13.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.13.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.13.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.13.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.13.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.13.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.13.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.13.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.13.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.13.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.13.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.13.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.13.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.13.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.13.ffn_norm.weight = #stream.parameter.named<"model"::"blk.13.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.13.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.13.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.13.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.13.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.13.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.13.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.13.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.13.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.13.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.13.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.13.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.13.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.14.attn_norm.weight = #stream.parameter.named<"model"::"blk.14.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.14.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.14.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.14.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.14.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.14.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.14.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.14.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.14.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.14.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.14.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.14.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.14.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.14.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.14.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.14.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.14.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.14.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.14.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.14.ffn_norm.weight = #stream.parameter.named<"model"::"blk.14.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.14.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.14.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.14.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.14.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.14.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.14.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.14.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.14.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.14.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.14.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.14.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.14.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.15.attn_norm.weight = #stream.parameter.named<"model"::"blk.15.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.15.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.15.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.15.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.15.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.15.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.15.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.15.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.15.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.15.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.15.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.15.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.15.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.15.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.15.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.15.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.15.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.15.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.15.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.15.ffn_norm.weight = #stream.parameter.named<"model"::"blk.15.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.15.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.15.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.15.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.15.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.15.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.15.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.15.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.15.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.15.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.15.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.15.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.15.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.16.attn_norm.weight = #stream.parameter.named<"model"::"blk.16.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.16.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.16.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.16.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.16.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.16.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.16.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.16.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.16.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.16.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.16.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.16.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.16.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.16.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.16.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.16.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.16.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.16.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.16.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.16.ffn_norm.weight = #stream.parameter.named<"model"::"blk.16.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.16.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.16.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.16.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.16.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.16.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.16.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.16.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.16.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.16.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.16.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.16.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.16.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.17.attn_norm.weight = #stream.parameter.named<"model"::"blk.17.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.17.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.17.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.17.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.17.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.17.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.17.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.17.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.17.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.17.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.17.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.17.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.17.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.17.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.17.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.17.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.17.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.17.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.17.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.17.ffn_norm.weight = #stream.parameter.named<"model"::"blk.17.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.17.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.17.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.17.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.17.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.17.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.17.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.17.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.17.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.17.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.17.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.17.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.17.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.18.attn_norm.weight = #stream.parameter.named<"model"::"blk.18.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.18.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.18.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.18.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.18.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.18.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.18.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.18.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.18.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.18.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.18.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.18.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.18.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.18.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.18.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.18.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.18.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.18.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.18.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.18.ffn_norm.weight = #stream.parameter.named<"model"::"blk.18.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.18.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.18.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.18.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.18.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.18.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.18.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.18.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.18.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.18.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.18.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.18.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.18.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.19.attn_norm.weight = #stream.parameter.named<"model"::"blk.19.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.19.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.19.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.19.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.19.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.19.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.19.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.19.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.19.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.19.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.19.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.19.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.19.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.19.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.19.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.19.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.19.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.19.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.19.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.19.ffn_norm.weight = #stream.parameter.named<"model"::"blk.19.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.19.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.19.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.19.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.19.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.19.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.19.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.19.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.19.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.19.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.19.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.19.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.19.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.20.attn_norm.weight = #stream.parameter.named<"model"::"blk.20.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.20.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.20.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.20.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.20.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.20.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.20.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.20.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.20.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.20.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.20.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.20.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.20.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.20.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.20.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.20.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.20.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.20.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.20.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.20.ffn_norm.weight = #stream.parameter.named<"model"::"blk.20.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.20.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.20.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.20.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.20.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.20.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.20.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.20.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.20.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.20.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.20.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.20.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.20.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.21.attn_norm.weight = #stream.parameter.named<"model"::"blk.21.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.21.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.21.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.21.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.21.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.21.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.21.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.21.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.21.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.21.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.21.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.21.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.21.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.21.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.21.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.21.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.21.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.21.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.21.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.21.ffn_norm.weight = #stream.parameter.named<"model"::"blk.21.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.21.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.21.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.21.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.21.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.21.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.21.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.21.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.21.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.21.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.21.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.21.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.21.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.22.attn_norm.weight = #stream.parameter.named<"model"::"blk.22.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.22.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.22.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.22.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.22.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.22.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.22.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.22.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.22.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.22.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.22.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.22.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.22.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.22.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.22.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.22.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.22.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.22.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.22.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.22.ffn_norm.weight = #stream.parameter.named<"model"::"blk.22.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.22.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.22.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.22.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.22.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.22.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.22.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.22.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.22.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.22.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.22.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.22.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.22.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.23.attn_norm.weight = #stream.parameter.named<"model"::"blk.23.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.23.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.23.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.23.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.23.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.23.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.23.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.23.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.23.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.23.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.23.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.23.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.23.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.23.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.23.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.23.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.23.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.23.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.23.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.23.ffn_norm.weight = #stream.parameter.named<"model"::"blk.23.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.23.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.23.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.23.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.23.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.23.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.23.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.23.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.23.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.23.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.23.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.23.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.23.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.24.attn_norm.weight = #stream.parameter.named<"model"::"blk.24.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.24.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.24.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.24.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.24.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.24.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.24.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.24.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.24.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.24.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.24.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.24.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.24.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.24.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.24.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.24.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.24.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.24.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.24.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.24.ffn_norm.weight = #stream.parameter.named<"model"::"blk.24.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.24.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.24.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.24.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.24.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.24.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.24.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.24.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.24.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.24.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.24.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.24.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.24.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.25.attn_norm.weight = #stream.parameter.named<"model"::"blk.25.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.25.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.25.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.25.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.25.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.25.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.25.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.25.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.25.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.25.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.25.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.25.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.25.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.25.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.25.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.25.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.25.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.25.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.25.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.25.ffn_norm.weight = #stream.parameter.named<"model"::"blk.25.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.25.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.25.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.25.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.25.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.25.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.25.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.25.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.25.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.25.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.25.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.25.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.25.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.26.attn_norm.weight = #stream.parameter.named<"model"::"blk.26.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.26.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.26.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.26.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.26.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.26.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.26.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.26.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.26.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.26.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.26.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.26.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.26.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.26.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.26.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.26.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.26.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.26.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.26.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.26.ffn_norm.weight = #stream.parameter.named<"model"::"blk.26.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.26.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.26.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.26.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.26.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.26.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.26.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.26.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.26.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.26.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.26.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.26.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.26.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.27.attn_norm.weight = #stream.parameter.named<"model"::"blk.27.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.27.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.27.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.27.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.27.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.27.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.27.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.27.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.27.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.27.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.27.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.27.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.27.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.27.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.27.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.27.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.27.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.27.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.27.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.27.ffn_norm.weight = #stream.parameter.named<"model"::"blk.27.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.27.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.27.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.27.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.27.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.27.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.27.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.27.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.27.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.27.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.27.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.27.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.27.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.28.attn_norm.weight = #stream.parameter.named<"model"::"blk.28.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.28.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.28.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.28.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.28.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.28.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.28.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.28.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.28.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.28.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.28.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.28.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.28.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.28.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.28.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.28.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.28.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.28.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.28.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.28.ffn_norm.weight = #stream.parameter.named<"model"::"blk.28.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.28.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.28.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.28.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.28.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.28.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.28.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.28.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.28.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.28.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.28.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.28.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.28.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.29.attn_norm.weight = #stream.parameter.named<"model"::"blk.29.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.29.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.29.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.29.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.29.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.29.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.29.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.29.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.29.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.29.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.29.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.29.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.29.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.29.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.29.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.29.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.29.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.29.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.29.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.29.ffn_norm.weight = #stream.parameter.named<"model"::"blk.29.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.29.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.29.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.29.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.29.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.29.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.29.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.29.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.29.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.29.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.29.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.29.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.29.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.30.attn_norm.weight = #stream.parameter.named<"model"::"blk.30.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.30.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.30.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.30.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.30.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.30.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.30.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.30.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.30.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.30.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.30.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.30.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.30.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.30.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.30.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.30.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.30.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.30.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.30.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.30.ffn_norm.weight = #stream.parameter.named<"model"::"blk.30.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.30.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.30.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.30.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.30.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.30.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.30.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.30.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.30.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.30.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.30.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.30.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.30.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.blk.31.attn_norm.weight = #stream.parameter.named<"model"::"blk.31.attn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.31.attn_q.q_input:rscale" = #stream.parameter.named<"model"::"blk.31.attn_q.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.31.attn_q.weight:qs" = #stream.parameter.named<"model"::"blk.31.attn_q.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.31.attn_k.q_input:rscale" = #stream.parameter.named<"model"::"blk.31.attn_k.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.31.attn_k.weight:qs" = #stream.parameter.named<"model"::"blk.31.attn_k.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.31.attn_v.q_input:rscale" = #stream.parameter.named<"model"::"blk.31.attn_v.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.31.attn_v.weight:qs" = #stream.parameter.named<"model"::"blk.31.attn_v.weight:qs"> : tensor<1024x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.31.kv_cache.quantizer:rscale" = #stream.parameter.named<"model"::"blk.31.kv_cache.quantizer:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.31.attn_output.q_input:rscale" = #stream.parameter.named<"model"::"blk.31.attn_output.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.31.attn_output.weight:qs" = #stream.parameter.named<"model"::"blk.31.attn_output.weight:qs"> : tensor<4096x4096xf8E4M3FNUZ> | |
util.global private @__auto.blk.31.ffn_norm.weight = #stream.parameter.named<"model"::"blk.31.ffn_norm.weight"> : tensor<4096xbf16> | |
util.global private @"__auto.blk.31.ffn_gate.q_input:rscale" = #stream.parameter.named<"model"::"blk.31.ffn_gate.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.31.ffn_gate.weight:qs" = #stream.parameter.named<"model"::"blk.31.ffn_gate.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.31.ffn_up.q_input:rscale" = #stream.parameter.named<"model"::"blk.31.ffn_up.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.31.ffn_up.weight:qs" = #stream.parameter.named<"model"::"blk.31.ffn_up.weight:qs"> : tensor<14336x4096xf8E4M3FNUZ> | |
util.global private @"__auto.blk.31.ffn_down.q_input:rscale" = #stream.parameter.named<"model"::"blk.31.ffn_down.q_input:rscale"> : tensor<f32> | |
util.global private @"__auto.blk.31.ffn_down.weight:qs" = #stream.parameter.named<"model"::"blk.31.ffn_down.weight:qs"> : tensor<4096x14336xf8E4M3FNUZ> | |
util.global private @__auto.output_norm.weight = #stream.parameter.named<"model"::"output_norm.weight"> : tensor<4096xbf16> | |
util.global private @__auto.output.weight = #stream.parameter.named<"model"::"output.weight"> : tensor<128256x4096xbf16> | |
func.func @prefill_bs1(%arg0: !torch.vtensor<[1,?],si64>, %arg1: !torch.vtensor<[1],si64>, %arg2: !torch.vtensor<[1,?],si64>, %arg3: !torch.tensor<[?,2097152],f8E4M3FNUZ>) -> !torch.vtensor<[1,?,128256],bf16> attributes {torch.assume_strict_symbolic_shapes} { | |
%__auto.token_embd.weight = util.global.load @__auto.token_embd.weight : tensor<128256x4096xbf16> | |
%0 = torch_c.from_builtin_tensor %__auto.token_embd.weight : tensor<128256x4096xbf16> -> !torch.vtensor<[128256,4096],bf16> | |
%__auto.blk.0.attn_norm.weight = util.global.load @__auto.blk.0.attn_norm.weight : tensor<4096xbf16> | |
%1 = torch_c.from_builtin_tensor %__auto.blk.0.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.0.attn_q.q_input3Arscale = util.global.load @"__auto.blk.0.attn_q.q_input:rscale" : tensor<f32> | |
%2 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.0.attn_q.weight3Aqs = util.global.load @"__auto.blk.0.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%3 = torch_c.from_builtin_tensor %__auto.blk.0.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.0.attn_k.q_input3Arscale = util.global.load @"__auto.blk.0.attn_k.q_input:rscale" : tensor<f32> | |
%4 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.0.attn_k.weight3Aqs = util.global.load @"__auto.blk.0.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%5 = torch_c.from_builtin_tensor %__auto.blk.0.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.0.attn_v.q_input3Arscale = util.global.load @"__auto.blk.0.attn_v.q_input:rscale" : tensor<f32> | |
%6 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.0.attn_v.weight3Aqs = util.global.load @"__auto.blk.0.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%7 = torch_c.from_builtin_tensor %__auto.blk.0.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.0.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.0.kv_cache.quantizer:rscale" : tensor<f32> | |
%8 = torch_c.from_builtin_tensor %__auto.blk.0.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.0.attn_output.q_input3Arscale = util.global.load @"__auto.blk.0.attn_output.q_input:rscale" : tensor<f32> | |
%9 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.0.attn_output.weight3Aqs = util.global.load @"__auto.blk.0.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%10 = torch_c.from_builtin_tensor %__auto.blk.0.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.0.ffn_norm.weight = util.global.load @__auto.blk.0.ffn_norm.weight : tensor<4096xbf16> | |
%11 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.0.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.0.ffn_gate.q_input:rscale" : tensor<f32> | |
%12 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.0.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.0.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%13 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.0.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.0.ffn_up.q_input:rscale" : tensor<f32> | |
%14 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.0.ffn_up.weight3Aqs = util.global.load @"__auto.blk.0.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%15 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.0.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.0.ffn_down.q_input:rscale" : tensor<f32> | |
%16 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.0.ffn_down.weight3Aqs = util.global.load @"__auto.blk.0.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%17 = torch_c.from_builtin_tensor %__auto.blk.0.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.1.attn_norm.weight = util.global.load @__auto.blk.1.attn_norm.weight : tensor<4096xbf16> | |
%18 = torch_c.from_builtin_tensor %__auto.blk.1.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.1.attn_q.q_input3Arscale = util.global.load @"__auto.blk.1.attn_q.q_input:rscale" : tensor<f32> | |
%19 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.1.attn_q.weight3Aqs = util.global.load @"__auto.blk.1.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%20 = torch_c.from_builtin_tensor %__auto.blk.1.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.1.attn_k.q_input3Arscale = util.global.load @"__auto.blk.1.attn_k.q_input:rscale" : tensor<f32> | |
%21 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.1.attn_k.weight3Aqs = util.global.load @"__auto.blk.1.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%22 = torch_c.from_builtin_tensor %__auto.blk.1.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.1.attn_v.q_input3Arscale = util.global.load @"__auto.blk.1.attn_v.q_input:rscale" : tensor<f32> | |
%23 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.1.attn_v.weight3Aqs = util.global.load @"__auto.blk.1.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%24 = torch_c.from_builtin_tensor %__auto.blk.1.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.1.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.1.kv_cache.quantizer:rscale" : tensor<f32> | |
%25 = torch_c.from_builtin_tensor %__auto.blk.1.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.1.attn_output.q_input3Arscale = util.global.load @"__auto.blk.1.attn_output.q_input:rscale" : tensor<f32> | |
%26 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.1.attn_output.weight3Aqs = util.global.load @"__auto.blk.1.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%27 = torch_c.from_builtin_tensor %__auto.blk.1.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.1.ffn_norm.weight = util.global.load @__auto.blk.1.ffn_norm.weight : tensor<4096xbf16> | |
%28 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.1.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.1.ffn_gate.q_input:rscale" : tensor<f32> | |
%29 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.1.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.1.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%30 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.1.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.1.ffn_up.q_input:rscale" : tensor<f32> | |
%31 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.1.ffn_up.weight3Aqs = util.global.load @"__auto.blk.1.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%32 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.1.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.1.ffn_down.q_input:rscale" : tensor<f32> | |
%33 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.1.ffn_down.weight3Aqs = util.global.load @"__auto.blk.1.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%34 = torch_c.from_builtin_tensor %__auto.blk.1.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.2.attn_norm.weight = util.global.load @__auto.blk.2.attn_norm.weight : tensor<4096xbf16> | |
%35 = torch_c.from_builtin_tensor %__auto.blk.2.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.2.attn_q.q_input3Arscale = util.global.load @"__auto.blk.2.attn_q.q_input:rscale" : tensor<f32> | |
%36 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.2.attn_q.weight3Aqs = util.global.load @"__auto.blk.2.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%37 = torch_c.from_builtin_tensor %__auto.blk.2.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.2.attn_k.q_input3Arscale = util.global.load @"__auto.blk.2.attn_k.q_input:rscale" : tensor<f32> | |
%38 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.2.attn_k.weight3Aqs = util.global.load @"__auto.blk.2.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%39 = torch_c.from_builtin_tensor %__auto.blk.2.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.2.attn_v.q_input3Arscale = util.global.load @"__auto.blk.2.attn_v.q_input:rscale" : tensor<f32> | |
%40 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.2.attn_v.weight3Aqs = util.global.load @"__auto.blk.2.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%41 = torch_c.from_builtin_tensor %__auto.blk.2.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.2.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.2.kv_cache.quantizer:rscale" : tensor<f32> | |
%42 = torch_c.from_builtin_tensor %__auto.blk.2.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.2.attn_output.q_input3Arscale = util.global.load @"__auto.blk.2.attn_output.q_input:rscale" : tensor<f32> | |
%43 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.2.attn_output.weight3Aqs = util.global.load @"__auto.blk.2.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%44 = torch_c.from_builtin_tensor %__auto.blk.2.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.2.ffn_norm.weight = util.global.load @__auto.blk.2.ffn_norm.weight : tensor<4096xbf16> | |
%45 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.2.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.2.ffn_gate.q_input:rscale" : tensor<f32> | |
%46 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.2.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.2.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%47 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.2.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.2.ffn_up.q_input:rscale" : tensor<f32> | |
%48 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.2.ffn_up.weight3Aqs = util.global.load @"__auto.blk.2.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%49 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.2.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.2.ffn_down.q_input:rscale" : tensor<f32> | |
%50 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.2.ffn_down.weight3Aqs = util.global.load @"__auto.blk.2.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%51 = torch_c.from_builtin_tensor %__auto.blk.2.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.3.attn_norm.weight = util.global.load @__auto.blk.3.attn_norm.weight : tensor<4096xbf16> | |
%52 = torch_c.from_builtin_tensor %__auto.blk.3.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.3.attn_q.q_input3Arscale = util.global.load @"__auto.blk.3.attn_q.q_input:rscale" : tensor<f32> | |
%53 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.3.attn_q.weight3Aqs = util.global.load @"__auto.blk.3.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%54 = torch_c.from_builtin_tensor %__auto.blk.3.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.3.attn_k.q_input3Arscale = util.global.load @"__auto.blk.3.attn_k.q_input:rscale" : tensor<f32> | |
%55 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.3.attn_k.weight3Aqs = util.global.load @"__auto.blk.3.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%56 = torch_c.from_builtin_tensor %__auto.blk.3.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.3.attn_v.q_input3Arscale = util.global.load @"__auto.blk.3.attn_v.q_input:rscale" : tensor<f32> | |
%57 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.3.attn_v.weight3Aqs = util.global.load @"__auto.blk.3.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%58 = torch_c.from_builtin_tensor %__auto.blk.3.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.3.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.3.kv_cache.quantizer:rscale" : tensor<f32> | |
%59 = torch_c.from_builtin_tensor %__auto.blk.3.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.3.attn_output.q_input3Arscale = util.global.load @"__auto.blk.3.attn_output.q_input:rscale" : tensor<f32> | |
%60 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.3.attn_output.weight3Aqs = util.global.load @"__auto.blk.3.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%61 = torch_c.from_builtin_tensor %__auto.blk.3.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.3.ffn_norm.weight = util.global.load @__auto.blk.3.ffn_norm.weight : tensor<4096xbf16> | |
%62 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.3.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.3.ffn_gate.q_input:rscale" : tensor<f32> | |
%63 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.3.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.3.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%64 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.3.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.3.ffn_up.q_input:rscale" : tensor<f32> | |
%65 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.3.ffn_up.weight3Aqs = util.global.load @"__auto.blk.3.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%66 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.3.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.3.ffn_down.q_input:rscale" : tensor<f32> | |
%67 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.3.ffn_down.weight3Aqs = util.global.load @"__auto.blk.3.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%68 = torch_c.from_builtin_tensor %__auto.blk.3.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.4.attn_norm.weight = util.global.load @__auto.blk.4.attn_norm.weight : tensor<4096xbf16> | |
%69 = torch_c.from_builtin_tensor %__auto.blk.4.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.4.attn_q.q_input3Arscale = util.global.load @"__auto.blk.4.attn_q.q_input:rscale" : tensor<f32> | |
%70 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.4.attn_q.weight3Aqs = util.global.load @"__auto.blk.4.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%71 = torch_c.from_builtin_tensor %__auto.blk.4.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.4.attn_k.q_input3Arscale = util.global.load @"__auto.blk.4.attn_k.q_input:rscale" : tensor<f32> | |
%72 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.4.attn_k.weight3Aqs = util.global.load @"__auto.blk.4.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%73 = torch_c.from_builtin_tensor %__auto.blk.4.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.4.attn_v.q_input3Arscale = util.global.load @"__auto.blk.4.attn_v.q_input:rscale" : tensor<f32> | |
%74 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.4.attn_v.weight3Aqs = util.global.load @"__auto.blk.4.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%75 = torch_c.from_builtin_tensor %__auto.blk.4.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.4.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.4.kv_cache.quantizer:rscale" : tensor<f32> | |
%76 = torch_c.from_builtin_tensor %__auto.blk.4.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.4.attn_output.q_input3Arscale = util.global.load @"__auto.blk.4.attn_output.q_input:rscale" : tensor<f32> | |
%77 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.4.attn_output.weight3Aqs = util.global.load @"__auto.blk.4.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%78 = torch_c.from_builtin_tensor %__auto.blk.4.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.4.ffn_norm.weight = util.global.load @__auto.blk.4.ffn_norm.weight : tensor<4096xbf16> | |
%79 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.4.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.4.ffn_gate.q_input:rscale" : tensor<f32> | |
%80 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.4.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.4.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%81 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.4.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.4.ffn_up.q_input:rscale" : tensor<f32> | |
%82 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.4.ffn_up.weight3Aqs = util.global.load @"__auto.blk.4.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%83 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.4.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.4.ffn_down.q_input:rscale" : tensor<f32> | |
%84 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.4.ffn_down.weight3Aqs = util.global.load @"__auto.blk.4.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%85 = torch_c.from_builtin_tensor %__auto.blk.4.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.5.attn_norm.weight = util.global.load @__auto.blk.5.attn_norm.weight : tensor<4096xbf16> | |
%86 = torch_c.from_builtin_tensor %__auto.blk.5.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.5.attn_q.q_input3Arscale = util.global.load @"__auto.blk.5.attn_q.q_input:rscale" : tensor<f32> | |
%87 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.5.attn_q.weight3Aqs = util.global.load @"__auto.blk.5.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%88 = torch_c.from_builtin_tensor %__auto.blk.5.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.5.attn_k.q_input3Arscale = util.global.load @"__auto.blk.5.attn_k.q_input:rscale" : tensor<f32> | |
%89 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.5.attn_k.weight3Aqs = util.global.load @"__auto.blk.5.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%90 = torch_c.from_builtin_tensor %__auto.blk.5.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.5.attn_v.q_input3Arscale = util.global.load @"__auto.blk.5.attn_v.q_input:rscale" : tensor<f32> | |
%91 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.5.attn_v.weight3Aqs = util.global.load @"__auto.blk.5.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%92 = torch_c.from_builtin_tensor %__auto.blk.5.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.5.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.5.kv_cache.quantizer:rscale" : tensor<f32> | |
%93 = torch_c.from_builtin_tensor %__auto.blk.5.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.5.attn_output.q_input3Arscale = util.global.load @"__auto.blk.5.attn_output.q_input:rscale" : tensor<f32> | |
%94 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.5.attn_output.weight3Aqs = util.global.load @"__auto.blk.5.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%95 = torch_c.from_builtin_tensor %__auto.blk.5.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.5.ffn_norm.weight = util.global.load @__auto.blk.5.ffn_norm.weight : tensor<4096xbf16> | |
%96 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.5.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.5.ffn_gate.q_input:rscale" : tensor<f32> | |
%97 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.5.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.5.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%98 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.5.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.5.ffn_up.q_input:rscale" : tensor<f32> | |
%99 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.5.ffn_up.weight3Aqs = util.global.load @"__auto.blk.5.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%100 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.5.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.5.ffn_down.q_input:rscale" : tensor<f32> | |
%101 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.5.ffn_down.weight3Aqs = util.global.load @"__auto.blk.5.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%102 = torch_c.from_builtin_tensor %__auto.blk.5.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.6.attn_norm.weight = util.global.load @__auto.blk.6.attn_norm.weight : tensor<4096xbf16> | |
%103 = torch_c.from_builtin_tensor %__auto.blk.6.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.6.attn_q.q_input3Arscale = util.global.load @"__auto.blk.6.attn_q.q_input:rscale" : tensor<f32> | |
%104 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.6.attn_q.weight3Aqs = util.global.load @"__auto.blk.6.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%105 = torch_c.from_builtin_tensor %__auto.blk.6.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.6.attn_k.q_input3Arscale = util.global.load @"__auto.blk.6.attn_k.q_input:rscale" : tensor<f32> | |
%106 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.6.attn_k.weight3Aqs = util.global.load @"__auto.blk.6.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%107 = torch_c.from_builtin_tensor %__auto.blk.6.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.6.attn_v.q_input3Arscale = util.global.load @"__auto.blk.6.attn_v.q_input:rscale" : tensor<f32> | |
%108 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.6.attn_v.weight3Aqs = util.global.load @"__auto.blk.6.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%109 = torch_c.from_builtin_tensor %__auto.blk.6.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.6.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.6.kv_cache.quantizer:rscale" : tensor<f32> | |
%110 = torch_c.from_builtin_tensor %__auto.blk.6.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.6.attn_output.q_input3Arscale = util.global.load @"__auto.blk.6.attn_output.q_input:rscale" : tensor<f32> | |
%111 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.6.attn_output.weight3Aqs = util.global.load @"__auto.blk.6.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%112 = torch_c.from_builtin_tensor %__auto.blk.6.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.6.ffn_norm.weight = util.global.load @__auto.blk.6.ffn_norm.weight : tensor<4096xbf16> | |
%113 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.6.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.6.ffn_gate.q_input:rscale" : tensor<f32> | |
%114 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.6.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.6.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%115 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.6.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.6.ffn_up.q_input:rscale" : tensor<f32> | |
%116 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.6.ffn_up.weight3Aqs = util.global.load @"__auto.blk.6.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%117 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.6.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.6.ffn_down.q_input:rscale" : tensor<f32> | |
%118 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.6.ffn_down.weight3Aqs = util.global.load @"__auto.blk.6.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%119 = torch_c.from_builtin_tensor %__auto.blk.6.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.7.attn_norm.weight = util.global.load @__auto.blk.7.attn_norm.weight : tensor<4096xbf16> | |
%120 = torch_c.from_builtin_tensor %__auto.blk.7.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.7.attn_q.q_input3Arscale = util.global.load @"__auto.blk.7.attn_q.q_input:rscale" : tensor<f32> | |
%121 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.7.attn_q.weight3Aqs = util.global.load @"__auto.blk.7.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%122 = torch_c.from_builtin_tensor %__auto.blk.7.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.7.attn_k.q_input3Arscale = util.global.load @"__auto.blk.7.attn_k.q_input:rscale" : tensor<f32> | |
%123 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.7.attn_k.weight3Aqs = util.global.load @"__auto.blk.7.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%124 = torch_c.from_builtin_tensor %__auto.blk.7.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.7.attn_v.q_input3Arscale = util.global.load @"__auto.blk.7.attn_v.q_input:rscale" : tensor<f32> | |
%125 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.7.attn_v.weight3Aqs = util.global.load @"__auto.blk.7.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%126 = torch_c.from_builtin_tensor %__auto.blk.7.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.7.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.7.kv_cache.quantizer:rscale" : tensor<f32> | |
%127 = torch_c.from_builtin_tensor %__auto.blk.7.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.7.attn_output.q_input3Arscale = util.global.load @"__auto.blk.7.attn_output.q_input:rscale" : tensor<f32> | |
%128 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.7.attn_output.weight3Aqs = util.global.load @"__auto.blk.7.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%129 = torch_c.from_builtin_tensor %__auto.blk.7.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.7.ffn_norm.weight = util.global.load @__auto.blk.7.ffn_norm.weight : tensor<4096xbf16> | |
%130 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.7.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.7.ffn_gate.q_input:rscale" : tensor<f32> | |
%131 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.7.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.7.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%132 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.7.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.7.ffn_up.q_input:rscale" : tensor<f32> | |
%133 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.7.ffn_up.weight3Aqs = util.global.load @"__auto.blk.7.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%134 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.7.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.7.ffn_down.q_input:rscale" : tensor<f32> | |
%135 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.7.ffn_down.weight3Aqs = util.global.load @"__auto.blk.7.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%136 = torch_c.from_builtin_tensor %__auto.blk.7.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.8.attn_norm.weight = util.global.load @__auto.blk.8.attn_norm.weight : tensor<4096xbf16> | |
%137 = torch_c.from_builtin_tensor %__auto.blk.8.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.8.attn_q.q_input3Arscale = util.global.load @"__auto.blk.8.attn_q.q_input:rscale" : tensor<f32> | |
%138 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.8.attn_q.weight3Aqs = util.global.load @"__auto.blk.8.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%139 = torch_c.from_builtin_tensor %__auto.blk.8.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.8.attn_k.q_input3Arscale = util.global.load @"__auto.blk.8.attn_k.q_input:rscale" : tensor<f32> | |
%140 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.8.attn_k.weight3Aqs = util.global.load @"__auto.blk.8.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%141 = torch_c.from_builtin_tensor %__auto.blk.8.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.8.attn_v.q_input3Arscale = util.global.load @"__auto.blk.8.attn_v.q_input:rscale" : tensor<f32> | |
%142 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.8.attn_v.weight3Aqs = util.global.load @"__auto.blk.8.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%143 = torch_c.from_builtin_tensor %__auto.blk.8.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.8.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.8.kv_cache.quantizer:rscale" : tensor<f32> | |
%144 = torch_c.from_builtin_tensor %__auto.blk.8.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.8.attn_output.q_input3Arscale = util.global.load @"__auto.blk.8.attn_output.q_input:rscale" : tensor<f32> | |
%145 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.8.attn_output.weight3Aqs = util.global.load @"__auto.blk.8.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%146 = torch_c.from_builtin_tensor %__auto.blk.8.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.8.ffn_norm.weight = util.global.load @__auto.blk.8.ffn_norm.weight : tensor<4096xbf16> | |
%147 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.8.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.8.ffn_gate.q_input:rscale" : tensor<f32> | |
%148 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.8.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.8.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%149 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.8.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.8.ffn_up.q_input:rscale" : tensor<f32> | |
%150 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.8.ffn_up.weight3Aqs = util.global.load @"__auto.blk.8.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%151 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.8.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.8.ffn_down.q_input:rscale" : tensor<f32> | |
%152 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.8.ffn_down.weight3Aqs = util.global.load @"__auto.blk.8.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%153 = torch_c.from_builtin_tensor %__auto.blk.8.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.9.attn_norm.weight = util.global.load @__auto.blk.9.attn_norm.weight : tensor<4096xbf16> | |
%154 = torch_c.from_builtin_tensor %__auto.blk.9.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.9.attn_q.q_input3Arscale = util.global.load @"__auto.blk.9.attn_q.q_input:rscale" : tensor<f32> | |
%155 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.9.attn_q.weight3Aqs = util.global.load @"__auto.blk.9.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%156 = torch_c.from_builtin_tensor %__auto.blk.9.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.9.attn_k.q_input3Arscale = util.global.load @"__auto.blk.9.attn_k.q_input:rscale" : tensor<f32> | |
%157 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.9.attn_k.weight3Aqs = util.global.load @"__auto.blk.9.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%158 = torch_c.from_builtin_tensor %__auto.blk.9.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.9.attn_v.q_input3Arscale = util.global.load @"__auto.blk.9.attn_v.q_input:rscale" : tensor<f32> | |
%159 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.9.attn_v.weight3Aqs = util.global.load @"__auto.blk.9.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%160 = torch_c.from_builtin_tensor %__auto.blk.9.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.9.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.9.kv_cache.quantizer:rscale" : tensor<f32> | |
%161 = torch_c.from_builtin_tensor %__auto.blk.9.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.9.attn_output.q_input3Arscale = util.global.load @"__auto.blk.9.attn_output.q_input:rscale" : tensor<f32> | |
%162 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.9.attn_output.weight3Aqs = util.global.load @"__auto.blk.9.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%163 = torch_c.from_builtin_tensor %__auto.blk.9.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.9.ffn_norm.weight = util.global.load @__auto.blk.9.ffn_norm.weight : tensor<4096xbf16> | |
%164 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.9.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.9.ffn_gate.q_input:rscale" : tensor<f32> | |
%165 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.9.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.9.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%166 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.9.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.9.ffn_up.q_input:rscale" : tensor<f32> | |
%167 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.9.ffn_up.weight3Aqs = util.global.load @"__auto.blk.9.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%168 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.9.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.9.ffn_down.q_input:rscale" : tensor<f32> | |
%169 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.9.ffn_down.weight3Aqs = util.global.load @"__auto.blk.9.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%170 = torch_c.from_builtin_tensor %__auto.blk.9.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.10.attn_norm.weight = util.global.load @__auto.blk.10.attn_norm.weight : tensor<4096xbf16> | |
%171 = torch_c.from_builtin_tensor %__auto.blk.10.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.10.attn_q.q_input3Arscale = util.global.load @"__auto.blk.10.attn_q.q_input:rscale" : tensor<f32> | |
%172 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.10.attn_q.weight3Aqs = util.global.load @"__auto.blk.10.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%173 = torch_c.from_builtin_tensor %__auto.blk.10.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.10.attn_k.q_input3Arscale = util.global.load @"__auto.blk.10.attn_k.q_input:rscale" : tensor<f32> | |
%174 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.10.attn_k.weight3Aqs = util.global.load @"__auto.blk.10.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%175 = torch_c.from_builtin_tensor %__auto.blk.10.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.10.attn_v.q_input3Arscale = util.global.load @"__auto.blk.10.attn_v.q_input:rscale" : tensor<f32> | |
%176 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.10.attn_v.weight3Aqs = util.global.load @"__auto.blk.10.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%177 = torch_c.from_builtin_tensor %__auto.blk.10.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.10.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.10.kv_cache.quantizer:rscale" : tensor<f32> | |
%178 = torch_c.from_builtin_tensor %__auto.blk.10.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.10.attn_output.q_input3Arscale = util.global.load @"__auto.blk.10.attn_output.q_input:rscale" : tensor<f32> | |
%179 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.10.attn_output.weight3Aqs = util.global.load @"__auto.blk.10.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%180 = torch_c.from_builtin_tensor %__auto.blk.10.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.10.ffn_norm.weight = util.global.load @__auto.blk.10.ffn_norm.weight : tensor<4096xbf16> | |
%181 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.10.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.10.ffn_gate.q_input:rscale" : tensor<f32> | |
%182 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.10.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.10.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%183 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.10.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.10.ffn_up.q_input:rscale" : tensor<f32> | |
%184 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.10.ffn_up.weight3Aqs = util.global.load @"__auto.blk.10.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%185 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.10.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.10.ffn_down.q_input:rscale" : tensor<f32> | |
%186 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.10.ffn_down.weight3Aqs = util.global.load @"__auto.blk.10.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%187 = torch_c.from_builtin_tensor %__auto.blk.10.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.11.attn_norm.weight = util.global.load @__auto.blk.11.attn_norm.weight : tensor<4096xbf16> | |
%188 = torch_c.from_builtin_tensor %__auto.blk.11.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.11.attn_q.q_input3Arscale = util.global.load @"__auto.blk.11.attn_q.q_input:rscale" : tensor<f32> | |
%189 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.11.attn_q.weight3Aqs = util.global.load @"__auto.blk.11.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%190 = torch_c.from_builtin_tensor %__auto.blk.11.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.11.attn_k.q_input3Arscale = util.global.load @"__auto.blk.11.attn_k.q_input:rscale" : tensor<f32> | |
%191 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.11.attn_k.weight3Aqs = util.global.load @"__auto.blk.11.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%192 = torch_c.from_builtin_tensor %__auto.blk.11.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.11.attn_v.q_input3Arscale = util.global.load @"__auto.blk.11.attn_v.q_input:rscale" : tensor<f32> | |
%193 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.11.attn_v.weight3Aqs = util.global.load @"__auto.blk.11.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%194 = torch_c.from_builtin_tensor %__auto.blk.11.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.11.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.11.kv_cache.quantizer:rscale" : tensor<f32> | |
%195 = torch_c.from_builtin_tensor %__auto.blk.11.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.11.attn_output.q_input3Arscale = util.global.load @"__auto.blk.11.attn_output.q_input:rscale" : tensor<f32> | |
%196 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.11.attn_output.weight3Aqs = util.global.load @"__auto.blk.11.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%197 = torch_c.from_builtin_tensor %__auto.blk.11.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.11.ffn_norm.weight = util.global.load @__auto.blk.11.ffn_norm.weight : tensor<4096xbf16> | |
%198 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.11.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.11.ffn_gate.q_input:rscale" : tensor<f32> | |
%199 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.11.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.11.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%200 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.11.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.11.ffn_up.q_input:rscale" : tensor<f32> | |
%201 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.11.ffn_up.weight3Aqs = util.global.load @"__auto.blk.11.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%202 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.11.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.11.ffn_down.q_input:rscale" : tensor<f32> | |
%203 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.11.ffn_down.weight3Aqs = util.global.load @"__auto.blk.11.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%204 = torch_c.from_builtin_tensor %__auto.blk.11.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.12.attn_norm.weight = util.global.load @__auto.blk.12.attn_norm.weight : tensor<4096xbf16> | |
%205 = torch_c.from_builtin_tensor %__auto.blk.12.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.12.attn_q.q_input3Arscale = util.global.load @"__auto.blk.12.attn_q.q_input:rscale" : tensor<f32> | |
%206 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.12.attn_q.weight3Aqs = util.global.load @"__auto.blk.12.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%207 = torch_c.from_builtin_tensor %__auto.blk.12.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.12.attn_k.q_input3Arscale = util.global.load @"__auto.blk.12.attn_k.q_input:rscale" : tensor<f32> | |
%208 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.12.attn_k.weight3Aqs = util.global.load @"__auto.blk.12.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%209 = torch_c.from_builtin_tensor %__auto.blk.12.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.12.attn_v.q_input3Arscale = util.global.load @"__auto.blk.12.attn_v.q_input:rscale" : tensor<f32> | |
%210 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.12.attn_v.weight3Aqs = util.global.load @"__auto.blk.12.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%211 = torch_c.from_builtin_tensor %__auto.blk.12.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.12.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.12.kv_cache.quantizer:rscale" : tensor<f32> | |
%212 = torch_c.from_builtin_tensor %__auto.blk.12.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.12.attn_output.q_input3Arscale = util.global.load @"__auto.blk.12.attn_output.q_input:rscale" : tensor<f32> | |
%213 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.12.attn_output.weight3Aqs = util.global.load @"__auto.blk.12.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%214 = torch_c.from_builtin_tensor %__auto.blk.12.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.12.ffn_norm.weight = util.global.load @__auto.blk.12.ffn_norm.weight : tensor<4096xbf16> | |
%215 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.12.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.12.ffn_gate.q_input:rscale" : tensor<f32> | |
%216 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.12.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.12.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%217 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.12.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.12.ffn_up.q_input:rscale" : tensor<f32> | |
%218 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.12.ffn_up.weight3Aqs = util.global.load @"__auto.blk.12.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%219 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.12.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.12.ffn_down.q_input:rscale" : tensor<f32> | |
%220 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.12.ffn_down.weight3Aqs = util.global.load @"__auto.blk.12.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%221 = torch_c.from_builtin_tensor %__auto.blk.12.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.13.attn_norm.weight = util.global.load @__auto.blk.13.attn_norm.weight : tensor<4096xbf16> | |
%222 = torch_c.from_builtin_tensor %__auto.blk.13.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.13.attn_q.q_input3Arscale = util.global.load @"__auto.blk.13.attn_q.q_input:rscale" : tensor<f32> | |
%223 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.13.attn_q.weight3Aqs = util.global.load @"__auto.blk.13.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%224 = torch_c.from_builtin_tensor %__auto.blk.13.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.13.attn_k.q_input3Arscale = util.global.load @"__auto.blk.13.attn_k.q_input:rscale" : tensor<f32> | |
%225 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.13.attn_k.weight3Aqs = util.global.load @"__auto.blk.13.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%226 = torch_c.from_builtin_tensor %__auto.blk.13.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.13.attn_v.q_input3Arscale = util.global.load @"__auto.blk.13.attn_v.q_input:rscale" : tensor<f32> | |
%227 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.13.attn_v.weight3Aqs = util.global.load @"__auto.blk.13.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%228 = torch_c.from_builtin_tensor %__auto.blk.13.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.13.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.13.kv_cache.quantizer:rscale" : tensor<f32> | |
%229 = torch_c.from_builtin_tensor %__auto.blk.13.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.13.attn_output.q_input3Arscale = util.global.load @"__auto.blk.13.attn_output.q_input:rscale" : tensor<f32> | |
%230 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.13.attn_output.weight3Aqs = util.global.load @"__auto.blk.13.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%231 = torch_c.from_builtin_tensor %__auto.blk.13.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.13.ffn_norm.weight = util.global.load @__auto.blk.13.ffn_norm.weight : tensor<4096xbf16> | |
%232 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.13.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.13.ffn_gate.q_input:rscale" : tensor<f32> | |
%233 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.13.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.13.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%234 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.13.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.13.ffn_up.q_input:rscale" : tensor<f32> | |
%235 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.13.ffn_up.weight3Aqs = util.global.load @"__auto.blk.13.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%236 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.13.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.13.ffn_down.q_input:rscale" : tensor<f32> | |
%237 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.13.ffn_down.weight3Aqs = util.global.load @"__auto.blk.13.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%238 = torch_c.from_builtin_tensor %__auto.blk.13.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.14.attn_norm.weight = util.global.load @__auto.blk.14.attn_norm.weight : tensor<4096xbf16> | |
%239 = torch_c.from_builtin_tensor %__auto.blk.14.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.14.attn_q.q_input3Arscale = util.global.load @"__auto.blk.14.attn_q.q_input:rscale" : tensor<f32> | |
%240 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.14.attn_q.weight3Aqs = util.global.load @"__auto.blk.14.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%241 = torch_c.from_builtin_tensor %__auto.blk.14.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.14.attn_k.q_input3Arscale = util.global.load @"__auto.blk.14.attn_k.q_input:rscale" : tensor<f32> | |
%242 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.14.attn_k.weight3Aqs = util.global.load @"__auto.blk.14.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%243 = torch_c.from_builtin_tensor %__auto.blk.14.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.14.attn_v.q_input3Arscale = util.global.load @"__auto.blk.14.attn_v.q_input:rscale" : tensor<f32> | |
%244 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.14.attn_v.weight3Aqs = util.global.load @"__auto.blk.14.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%245 = torch_c.from_builtin_tensor %__auto.blk.14.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.14.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.14.kv_cache.quantizer:rscale" : tensor<f32> | |
%246 = torch_c.from_builtin_tensor %__auto.blk.14.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.14.attn_output.q_input3Arscale = util.global.load @"__auto.blk.14.attn_output.q_input:rscale" : tensor<f32> | |
%247 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.14.attn_output.weight3Aqs = util.global.load @"__auto.blk.14.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%248 = torch_c.from_builtin_tensor %__auto.blk.14.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.14.ffn_norm.weight = util.global.load @__auto.blk.14.ffn_norm.weight : tensor<4096xbf16> | |
%249 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.14.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.14.ffn_gate.q_input:rscale" : tensor<f32> | |
%250 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.14.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.14.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%251 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.14.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.14.ffn_up.q_input:rscale" : tensor<f32> | |
%252 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.14.ffn_up.weight3Aqs = util.global.load @"__auto.blk.14.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%253 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.14.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.14.ffn_down.q_input:rscale" : tensor<f32> | |
%254 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.14.ffn_down.weight3Aqs = util.global.load @"__auto.blk.14.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%255 = torch_c.from_builtin_tensor %__auto.blk.14.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.15.attn_norm.weight = util.global.load @__auto.blk.15.attn_norm.weight : tensor<4096xbf16> | |
%256 = torch_c.from_builtin_tensor %__auto.blk.15.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.15.attn_q.q_input3Arscale = util.global.load @"__auto.blk.15.attn_q.q_input:rscale" : tensor<f32> | |
%257 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.15.attn_q.weight3Aqs = util.global.load @"__auto.blk.15.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%258 = torch_c.from_builtin_tensor %__auto.blk.15.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.15.attn_k.q_input3Arscale = util.global.load @"__auto.blk.15.attn_k.q_input:rscale" : tensor<f32> | |
%259 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.15.attn_k.weight3Aqs = util.global.load @"__auto.blk.15.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%260 = torch_c.from_builtin_tensor %__auto.blk.15.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.15.attn_v.q_input3Arscale = util.global.load @"__auto.blk.15.attn_v.q_input:rscale" : tensor<f32> | |
%261 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.15.attn_v.weight3Aqs = util.global.load @"__auto.blk.15.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%262 = torch_c.from_builtin_tensor %__auto.blk.15.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.15.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.15.kv_cache.quantizer:rscale" : tensor<f32> | |
%263 = torch_c.from_builtin_tensor %__auto.blk.15.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.15.attn_output.q_input3Arscale = util.global.load @"__auto.blk.15.attn_output.q_input:rscale" : tensor<f32> | |
%264 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.15.attn_output.weight3Aqs = util.global.load @"__auto.blk.15.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%265 = torch_c.from_builtin_tensor %__auto.blk.15.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.15.ffn_norm.weight = util.global.load @__auto.blk.15.ffn_norm.weight : tensor<4096xbf16> | |
%266 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.15.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.15.ffn_gate.q_input:rscale" : tensor<f32> | |
%267 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.15.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.15.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%268 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.15.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.15.ffn_up.q_input:rscale" : tensor<f32> | |
%269 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.15.ffn_up.weight3Aqs = util.global.load @"__auto.blk.15.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%270 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.15.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.15.ffn_down.q_input:rscale" : tensor<f32> | |
%271 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.15.ffn_down.weight3Aqs = util.global.load @"__auto.blk.15.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%272 = torch_c.from_builtin_tensor %__auto.blk.15.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.16.attn_norm.weight = util.global.load @__auto.blk.16.attn_norm.weight : tensor<4096xbf16> | |
%273 = torch_c.from_builtin_tensor %__auto.blk.16.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.16.attn_q.q_input3Arscale = util.global.load @"__auto.blk.16.attn_q.q_input:rscale" : tensor<f32> | |
%274 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.16.attn_q.weight3Aqs = util.global.load @"__auto.blk.16.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%275 = torch_c.from_builtin_tensor %__auto.blk.16.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.16.attn_k.q_input3Arscale = util.global.load @"__auto.blk.16.attn_k.q_input:rscale" : tensor<f32> | |
%276 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.16.attn_k.weight3Aqs = util.global.load @"__auto.blk.16.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%277 = torch_c.from_builtin_tensor %__auto.blk.16.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.16.attn_v.q_input3Arscale = util.global.load @"__auto.blk.16.attn_v.q_input:rscale" : tensor<f32> | |
%278 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.16.attn_v.weight3Aqs = util.global.load @"__auto.blk.16.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%279 = torch_c.from_builtin_tensor %__auto.blk.16.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.16.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.16.kv_cache.quantizer:rscale" : tensor<f32> | |
%280 = torch_c.from_builtin_tensor %__auto.blk.16.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.16.attn_output.q_input3Arscale = util.global.load @"__auto.blk.16.attn_output.q_input:rscale" : tensor<f32> | |
%281 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.16.attn_output.weight3Aqs = util.global.load @"__auto.blk.16.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%282 = torch_c.from_builtin_tensor %__auto.blk.16.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.16.ffn_norm.weight = util.global.load @__auto.blk.16.ffn_norm.weight : tensor<4096xbf16> | |
%283 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.16.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.16.ffn_gate.q_input:rscale" : tensor<f32> | |
%284 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.16.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.16.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%285 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.16.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.16.ffn_up.q_input:rscale" : tensor<f32> | |
%286 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.16.ffn_up.weight3Aqs = util.global.load @"__auto.blk.16.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%287 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.16.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.16.ffn_down.q_input:rscale" : tensor<f32> | |
%288 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.16.ffn_down.weight3Aqs = util.global.load @"__auto.blk.16.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%289 = torch_c.from_builtin_tensor %__auto.blk.16.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.17.attn_norm.weight = util.global.load @__auto.blk.17.attn_norm.weight : tensor<4096xbf16> | |
%290 = torch_c.from_builtin_tensor %__auto.blk.17.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.17.attn_q.q_input3Arscale = util.global.load @"__auto.blk.17.attn_q.q_input:rscale" : tensor<f32> | |
%291 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.17.attn_q.weight3Aqs = util.global.load @"__auto.blk.17.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%292 = torch_c.from_builtin_tensor %__auto.blk.17.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.17.attn_k.q_input3Arscale = util.global.load @"__auto.blk.17.attn_k.q_input:rscale" : tensor<f32> | |
%293 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.17.attn_k.weight3Aqs = util.global.load @"__auto.blk.17.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%294 = torch_c.from_builtin_tensor %__auto.blk.17.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.17.attn_v.q_input3Arscale = util.global.load @"__auto.blk.17.attn_v.q_input:rscale" : tensor<f32> | |
%295 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.17.attn_v.weight3Aqs = util.global.load @"__auto.blk.17.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%296 = torch_c.from_builtin_tensor %__auto.blk.17.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.17.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.17.kv_cache.quantizer:rscale" : tensor<f32> | |
%297 = torch_c.from_builtin_tensor %__auto.blk.17.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.17.attn_output.q_input3Arscale = util.global.load @"__auto.blk.17.attn_output.q_input:rscale" : tensor<f32> | |
%298 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.17.attn_output.weight3Aqs = util.global.load @"__auto.blk.17.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%299 = torch_c.from_builtin_tensor %__auto.blk.17.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.17.ffn_norm.weight = util.global.load @__auto.blk.17.ffn_norm.weight : tensor<4096xbf16> | |
%300 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.17.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.17.ffn_gate.q_input:rscale" : tensor<f32> | |
%301 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.17.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.17.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%302 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.17.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.17.ffn_up.q_input:rscale" : tensor<f32> | |
%303 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.17.ffn_up.weight3Aqs = util.global.load @"__auto.blk.17.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%304 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.17.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.17.ffn_down.q_input:rscale" : tensor<f32> | |
%305 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.17.ffn_down.weight3Aqs = util.global.load @"__auto.blk.17.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%306 = torch_c.from_builtin_tensor %__auto.blk.17.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.18.attn_norm.weight = util.global.load @__auto.blk.18.attn_norm.weight : tensor<4096xbf16> | |
%307 = torch_c.from_builtin_tensor %__auto.blk.18.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.18.attn_q.q_input3Arscale = util.global.load @"__auto.blk.18.attn_q.q_input:rscale" : tensor<f32> | |
%308 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.18.attn_q.weight3Aqs = util.global.load @"__auto.blk.18.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%309 = torch_c.from_builtin_tensor %__auto.blk.18.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.18.attn_k.q_input3Arscale = util.global.load @"__auto.blk.18.attn_k.q_input:rscale" : tensor<f32> | |
%310 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.18.attn_k.weight3Aqs = util.global.load @"__auto.blk.18.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%311 = torch_c.from_builtin_tensor %__auto.blk.18.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.18.attn_v.q_input3Arscale = util.global.load @"__auto.blk.18.attn_v.q_input:rscale" : tensor<f32> | |
%312 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.18.attn_v.weight3Aqs = util.global.load @"__auto.blk.18.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%313 = torch_c.from_builtin_tensor %__auto.blk.18.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.18.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.18.kv_cache.quantizer:rscale" : tensor<f32> | |
%314 = torch_c.from_builtin_tensor %__auto.blk.18.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.18.attn_output.q_input3Arscale = util.global.load @"__auto.blk.18.attn_output.q_input:rscale" : tensor<f32> | |
%315 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.18.attn_output.weight3Aqs = util.global.load @"__auto.blk.18.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%316 = torch_c.from_builtin_tensor %__auto.blk.18.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.18.ffn_norm.weight = util.global.load @__auto.blk.18.ffn_norm.weight : tensor<4096xbf16> | |
%317 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.18.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.18.ffn_gate.q_input:rscale" : tensor<f32> | |
%318 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.18.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.18.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%319 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.18.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.18.ffn_up.q_input:rscale" : tensor<f32> | |
%320 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.18.ffn_up.weight3Aqs = util.global.load @"__auto.blk.18.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%321 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.18.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.18.ffn_down.q_input:rscale" : tensor<f32> | |
%322 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.18.ffn_down.weight3Aqs = util.global.load @"__auto.blk.18.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%323 = torch_c.from_builtin_tensor %__auto.blk.18.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.19.attn_norm.weight = util.global.load @__auto.blk.19.attn_norm.weight : tensor<4096xbf16> | |
%324 = torch_c.from_builtin_tensor %__auto.blk.19.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.19.attn_q.q_input3Arscale = util.global.load @"__auto.blk.19.attn_q.q_input:rscale" : tensor<f32> | |
%325 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.19.attn_q.weight3Aqs = util.global.load @"__auto.blk.19.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%326 = torch_c.from_builtin_tensor %__auto.blk.19.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.19.attn_k.q_input3Arscale = util.global.load @"__auto.blk.19.attn_k.q_input:rscale" : tensor<f32> | |
%327 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.19.attn_k.weight3Aqs = util.global.load @"__auto.blk.19.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%328 = torch_c.from_builtin_tensor %__auto.blk.19.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.19.attn_v.q_input3Arscale = util.global.load @"__auto.blk.19.attn_v.q_input:rscale" : tensor<f32> | |
%329 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.19.attn_v.weight3Aqs = util.global.load @"__auto.blk.19.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%330 = torch_c.from_builtin_tensor %__auto.blk.19.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.19.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.19.kv_cache.quantizer:rscale" : tensor<f32> | |
%331 = torch_c.from_builtin_tensor %__auto.blk.19.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.19.attn_output.q_input3Arscale = util.global.load @"__auto.blk.19.attn_output.q_input:rscale" : tensor<f32> | |
%332 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.19.attn_output.weight3Aqs = util.global.load @"__auto.blk.19.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%333 = torch_c.from_builtin_tensor %__auto.blk.19.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.19.ffn_norm.weight = util.global.load @__auto.blk.19.ffn_norm.weight : tensor<4096xbf16> | |
%334 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.19.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.19.ffn_gate.q_input:rscale" : tensor<f32> | |
%335 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.19.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.19.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%336 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.19.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.19.ffn_up.q_input:rscale" : tensor<f32> | |
%337 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.19.ffn_up.weight3Aqs = util.global.load @"__auto.blk.19.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%338 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.19.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.19.ffn_down.q_input:rscale" : tensor<f32> | |
%339 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.19.ffn_down.weight3Aqs = util.global.load @"__auto.blk.19.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%340 = torch_c.from_builtin_tensor %__auto.blk.19.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.20.attn_norm.weight = util.global.load @__auto.blk.20.attn_norm.weight : tensor<4096xbf16> | |
%341 = torch_c.from_builtin_tensor %__auto.blk.20.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.20.attn_q.q_input3Arscale = util.global.load @"__auto.blk.20.attn_q.q_input:rscale" : tensor<f32> | |
%342 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.20.attn_q.weight3Aqs = util.global.load @"__auto.blk.20.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%343 = torch_c.from_builtin_tensor %__auto.blk.20.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.20.attn_k.q_input3Arscale = util.global.load @"__auto.blk.20.attn_k.q_input:rscale" : tensor<f32> | |
%344 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.20.attn_k.weight3Aqs = util.global.load @"__auto.blk.20.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%345 = torch_c.from_builtin_tensor %__auto.blk.20.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.20.attn_v.q_input3Arscale = util.global.load @"__auto.blk.20.attn_v.q_input:rscale" : tensor<f32> | |
%346 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.20.attn_v.weight3Aqs = util.global.load @"__auto.blk.20.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%347 = torch_c.from_builtin_tensor %__auto.blk.20.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.20.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.20.kv_cache.quantizer:rscale" : tensor<f32> | |
%348 = torch_c.from_builtin_tensor %__auto.blk.20.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.20.attn_output.q_input3Arscale = util.global.load @"__auto.blk.20.attn_output.q_input:rscale" : tensor<f32> | |
%349 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.20.attn_output.weight3Aqs = util.global.load @"__auto.blk.20.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%350 = torch_c.from_builtin_tensor %__auto.blk.20.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.20.ffn_norm.weight = util.global.load @__auto.blk.20.ffn_norm.weight : tensor<4096xbf16> | |
%351 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.20.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.20.ffn_gate.q_input:rscale" : tensor<f32> | |
%352 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.20.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.20.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%353 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.20.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.20.ffn_up.q_input:rscale" : tensor<f32> | |
%354 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.20.ffn_up.weight3Aqs = util.global.load @"__auto.blk.20.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%355 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.20.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.20.ffn_down.q_input:rscale" : tensor<f32> | |
%356 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.20.ffn_down.weight3Aqs = util.global.load @"__auto.blk.20.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%357 = torch_c.from_builtin_tensor %__auto.blk.20.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.21.attn_norm.weight = util.global.load @__auto.blk.21.attn_norm.weight : tensor<4096xbf16> | |
%358 = torch_c.from_builtin_tensor %__auto.blk.21.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.21.attn_q.q_input3Arscale = util.global.load @"__auto.blk.21.attn_q.q_input:rscale" : tensor<f32> | |
%359 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.21.attn_q.weight3Aqs = util.global.load @"__auto.blk.21.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%360 = torch_c.from_builtin_tensor %__auto.blk.21.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.21.attn_k.q_input3Arscale = util.global.load @"__auto.blk.21.attn_k.q_input:rscale" : tensor<f32> | |
%361 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.21.attn_k.weight3Aqs = util.global.load @"__auto.blk.21.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%362 = torch_c.from_builtin_tensor %__auto.blk.21.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.21.attn_v.q_input3Arscale = util.global.load @"__auto.blk.21.attn_v.q_input:rscale" : tensor<f32> | |
%363 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.21.attn_v.weight3Aqs = util.global.load @"__auto.blk.21.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%364 = torch_c.from_builtin_tensor %__auto.blk.21.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.21.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.21.kv_cache.quantizer:rscale" : tensor<f32> | |
%365 = torch_c.from_builtin_tensor %__auto.blk.21.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.21.attn_output.q_input3Arscale = util.global.load @"__auto.blk.21.attn_output.q_input:rscale" : tensor<f32> | |
%366 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.21.attn_output.weight3Aqs = util.global.load @"__auto.blk.21.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%367 = torch_c.from_builtin_tensor %__auto.blk.21.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.21.ffn_norm.weight = util.global.load @__auto.blk.21.ffn_norm.weight : tensor<4096xbf16> | |
%368 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.21.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.21.ffn_gate.q_input:rscale" : tensor<f32> | |
%369 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.21.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.21.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%370 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.21.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.21.ffn_up.q_input:rscale" : tensor<f32> | |
%371 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.21.ffn_up.weight3Aqs = util.global.load @"__auto.blk.21.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%372 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.21.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.21.ffn_down.q_input:rscale" : tensor<f32> | |
%373 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.21.ffn_down.weight3Aqs = util.global.load @"__auto.blk.21.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%374 = torch_c.from_builtin_tensor %__auto.blk.21.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.22.attn_norm.weight = util.global.load @__auto.blk.22.attn_norm.weight : tensor<4096xbf16> | |
%375 = torch_c.from_builtin_tensor %__auto.blk.22.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.22.attn_q.q_input3Arscale = util.global.load @"__auto.blk.22.attn_q.q_input:rscale" : tensor<f32> | |
%376 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.22.attn_q.weight3Aqs = util.global.load @"__auto.blk.22.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%377 = torch_c.from_builtin_tensor %__auto.blk.22.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.22.attn_k.q_input3Arscale = util.global.load @"__auto.blk.22.attn_k.q_input:rscale" : tensor<f32> | |
%378 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.22.attn_k.weight3Aqs = util.global.load @"__auto.blk.22.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%379 = torch_c.from_builtin_tensor %__auto.blk.22.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.22.attn_v.q_input3Arscale = util.global.load @"__auto.blk.22.attn_v.q_input:rscale" : tensor<f32> | |
%380 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.22.attn_v.weight3Aqs = util.global.load @"__auto.blk.22.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%381 = torch_c.from_builtin_tensor %__auto.blk.22.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.22.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.22.kv_cache.quantizer:rscale" : tensor<f32> | |
%382 = torch_c.from_builtin_tensor %__auto.blk.22.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.22.attn_output.q_input3Arscale = util.global.load @"__auto.blk.22.attn_output.q_input:rscale" : tensor<f32> | |
%383 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.22.attn_output.weight3Aqs = util.global.load @"__auto.blk.22.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%384 = torch_c.from_builtin_tensor %__auto.blk.22.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.22.ffn_norm.weight = util.global.load @__auto.blk.22.ffn_norm.weight : tensor<4096xbf16> | |
%385 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.22.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.22.ffn_gate.q_input:rscale" : tensor<f32> | |
%386 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.22.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.22.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%387 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.22.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.22.ffn_up.q_input:rscale" : tensor<f32> | |
%388 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.22.ffn_up.weight3Aqs = util.global.load @"__auto.blk.22.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%389 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.22.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.22.ffn_down.q_input:rscale" : tensor<f32> | |
%390 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.22.ffn_down.weight3Aqs = util.global.load @"__auto.blk.22.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%391 = torch_c.from_builtin_tensor %__auto.blk.22.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.23.attn_norm.weight = util.global.load @__auto.blk.23.attn_norm.weight : tensor<4096xbf16> | |
%392 = torch_c.from_builtin_tensor %__auto.blk.23.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.23.attn_q.q_input3Arscale = util.global.load @"__auto.blk.23.attn_q.q_input:rscale" : tensor<f32> | |
%393 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.23.attn_q.weight3Aqs = util.global.load @"__auto.blk.23.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%394 = torch_c.from_builtin_tensor %__auto.blk.23.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.23.attn_k.q_input3Arscale = util.global.load @"__auto.blk.23.attn_k.q_input:rscale" : tensor<f32> | |
%395 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.23.attn_k.weight3Aqs = util.global.load @"__auto.blk.23.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%396 = torch_c.from_builtin_tensor %__auto.blk.23.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.23.attn_v.q_input3Arscale = util.global.load @"__auto.blk.23.attn_v.q_input:rscale" : tensor<f32> | |
%397 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.23.attn_v.weight3Aqs = util.global.load @"__auto.blk.23.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%398 = torch_c.from_builtin_tensor %__auto.blk.23.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.23.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.23.kv_cache.quantizer:rscale" : tensor<f32> | |
%399 = torch_c.from_builtin_tensor %__auto.blk.23.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.23.attn_output.q_input3Arscale = util.global.load @"__auto.blk.23.attn_output.q_input:rscale" : tensor<f32> | |
%400 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.23.attn_output.weight3Aqs = util.global.load @"__auto.blk.23.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%401 = torch_c.from_builtin_tensor %__auto.blk.23.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.23.ffn_norm.weight = util.global.load @__auto.blk.23.ffn_norm.weight : tensor<4096xbf16> | |
%402 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.23.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.23.ffn_gate.q_input:rscale" : tensor<f32> | |
%403 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.23.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.23.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%404 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.23.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.23.ffn_up.q_input:rscale" : tensor<f32> | |
%405 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.23.ffn_up.weight3Aqs = util.global.load @"__auto.blk.23.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%406 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.23.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.23.ffn_down.q_input:rscale" : tensor<f32> | |
%407 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.23.ffn_down.weight3Aqs = util.global.load @"__auto.blk.23.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%408 = torch_c.from_builtin_tensor %__auto.blk.23.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.24.attn_norm.weight = util.global.load @__auto.blk.24.attn_norm.weight : tensor<4096xbf16> | |
%409 = torch_c.from_builtin_tensor %__auto.blk.24.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.24.attn_q.q_input3Arscale = util.global.load @"__auto.blk.24.attn_q.q_input:rscale" : tensor<f32> | |
%410 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.24.attn_q.weight3Aqs = util.global.load @"__auto.blk.24.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%411 = torch_c.from_builtin_tensor %__auto.blk.24.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.24.attn_k.q_input3Arscale = util.global.load @"__auto.blk.24.attn_k.q_input:rscale" : tensor<f32> | |
%412 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.24.attn_k.weight3Aqs = util.global.load @"__auto.blk.24.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%413 = torch_c.from_builtin_tensor %__auto.blk.24.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.24.attn_v.q_input3Arscale = util.global.load @"__auto.blk.24.attn_v.q_input:rscale" : tensor<f32> | |
%414 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.24.attn_v.weight3Aqs = util.global.load @"__auto.blk.24.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%415 = torch_c.from_builtin_tensor %__auto.blk.24.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.24.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.24.kv_cache.quantizer:rscale" : tensor<f32> | |
%416 = torch_c.from_builtin_tensor %__auto.blk.24.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.24.attn_output.q_input3Arscale = util.global.load @"__auto.blk.24.attn_output.q_input:rscale" : tensor<f32> | |
%417 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.24.attn_output.weight3Aqs = util.global.load @"__auto.blk.24.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%418 = torch_c.from_builtin_tensor %__auto.blk.24.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.24.ffn_norm.weight = util.global.load @__auto.blk.24.ffn_norm.weight : tensor<4096xbf16> | |
%419 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.24.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.24.ffn_gate.q_input:rscale" : tensor<f32> | |
%420 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.24.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.24.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%421 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.24.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.24.ffn_up.q_input:rscale" : tensor<f32> | |
%422 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.24.ffn_up.weight3Aqs = util.global.load @"__auto.blk.24.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%423 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.24.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.24.ffn_down.q_input:rscale" : tensor<f32> | |
%424 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.24.ffn_down.weight3Aqs = util.global.load @"__auto.blk.24.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%425 = torch_c.from_builtin_tensor %__auto.blk.24.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.25.attn_norm.weight = util.global.load @__auto.blk.25.attn_norm.weight : tensor<4096xbf16> | |
%426 = torch_c.from_builtin_tensor %__auto.blk.25.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.25.attn_q.q_input3Arscale = util.global.load @"__auto.blk.25.attn_q.q_input:rscale" : tensor<f32> | |
%427 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.25.attn_q.weight3Aqs = util.global.load @"__auto.blk.25.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%428 = torch_c.from_builtin_tensor %__auto.blk.25.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.25.attn_k.q_input3Arscale = util.global.load @"__auto.blk.25.attn_k.q_input:rscale" : tensor<f32> | |
%429 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.25.attn_k.weight3Aqs = util.global.load @"__auto.blk.25.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%430 = torch_c.from_builtin_tensor %__auto.blk.25.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.25.attn_v.q_input3Arscale = util.global.load @"__auto.blk.25.attn_v.q_input:rscale" : tensor<f32> | |
%431 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.25.attn_v.weight3Aqs = util.global.load @"__auto.blk.25.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%432 = torch_c.from_builtin_tensor %__auto.blk.25.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.25.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.25.kv_cache.quantizer:rscale" : tensor<f32> | |
%433 = torch_c.from_builtin_tensor %__auto.blk.25.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.25.attn_output.q_input3Arscale = util.global.load @"__auto.blk.25.attn_output.q_input:rscale" : tensor<f32> | |
%434 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.25.attn_output.weight3Aqs = util.global.load @"__auto.blk.25.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%435 = torch_c.from_builtin_tensor %__auto.blk.25.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.25.ffn_norm.weight = util.global.load @__auto.blk.25.ffn_norm.weight : tensor<4096xbf16> | |
%436 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.25.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.25.ffn_gate.q_input:rscale" : tensor<f32> | |
%437 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.25.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.25.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%438 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.25.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.25.ffn_up.q_input:rscale" : tensor<f32> | |
%439 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.25.ffn_up.weight3Aqs = util.global.load @"__auto.blk.25.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%440 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.25.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.25.ffn_down.q_input:rscale" : tensor<f32> | |
%441 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.25.ffn_down.weight3Aqs = util.global.load @"__auto.blk.25.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%442 = torch_c.from_builtin_tensor %__auto.blk.25.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.26.attn_norm.weight = util.global.load @__auto.blk.26.attn_norm.weight : tensor<4096xbf16> | |
%443 = torch_c.from_builtin_tensor %__auto.blk.26.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.26.attn_q.q_input3Arscale = util.global.load @"__auto.blk.26.attn_q.q_input:rscale" : tensor<f32> | |
%444 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.26.attn_q.weight3Aqs = util.global.load @"__auto.blk.26.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%445 = torch_c.from_builtin_tensor %__auto.blk.26.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.26.attn_k.q_input3Arscale = util.global.load @"__auto.blk.26.attn_k.q_input:rscale" : tensor<f32> | |
%446 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.26.attn_k.weight3Aqs = util.global.load @"__auto.blk.26.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%447 = torch_c.from_builtin_tensor %__auto.blk.26.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.26.attn_v.q_input3Arscale = util.global.load @"__auto.blk.26.attn_v.q_input:rscale" : tensor<f32> | |
%448 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.26.attn_v.weight3Aqs = util.global.load @"__auto.blk.26.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%449 = torch_c.from_builtin_tensor %__auto.blk.26.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.26.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.26.kv_cache.quantizer:rscale" : tensor<f32> | |
%450 = torch_c.from_builtin_tensor %__auto.blk.26.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.26.attn_output.q_input3Arscale = util.global.load @"__auto.blk.26.attn_output.q_input:rscale" : tensor<f32> | |
%451 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.26.attn_output.weight3Aqs = util.global.load @"__auto.blk.26.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%452 = torch_c.from_builtin_tensor %__auto.blk.26.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.26.ffn_norm.weight = util.global.load @__auto.blk.26.ffn_norm.weight : tensor<4096xbf16> | |
%453 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.26.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.26.ffn_gate.q_input:rscale" : tensor<f32> | |
%454 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.26.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.26.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%455 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.26.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.26.ffn_up.q_input:rscale" : tensor<f32> | |
%456 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.26.ffn_up.weight3Aqs = util.global.load @"__auto.blk.26.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%457 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.26.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.26.ffn_down.q_input:rscale" : tensor<f32> | |
%458 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.26.ffn_down.weight3Aqs = util.global.load @"__auto.blk.26.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%459 = torch_c.from_builtin_tensor %__auto.blk.26.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.27.attn_norm.weight = util.global.load @__auto.blk.27.attn_norm.weight : tensor<4096xbf16> | |
%460 = torch_c.from_builtin_tensor %__auto.blk.27.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.27.attn_q.q_input3Arscale = util.global.load @"__auto.blk.27.attn_q.q_input:rscale" : tensor<f32> | |
%461 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.27.attn_q.weight3Aqs = util.global.load @"__auto.blk.27.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%462 = torch_c.from_builtin_tensor %__auto.blk.27.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.27.attn_k.q_input3Arscale = util.global.load @"__auto.blk.27.attn_k.q_input:rscale" : tensor<f32> | |
%463 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.27.attn_k.weight3Aqs = util.global.load @"__auto.blk.27.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%464 = torch_c.from_builtin_tensor %__auto.blk.27.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.27.attn_v.q_input3Arscale = util.global.load @"__auto.blk.27.attn_v.q_input:rscale" : tensor<f32> | |
%465 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.27.attn_v.weight3Aqs = util.global.load @"__auto.blk.27.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%466 = torch_c.from_builtin_tensor %__auto.blk.27.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.27.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.27.kv_cache.quantizer:rscale" : tensor<f32> | |
%467 = torch_c.from_builtin_tensor %__auto.blk.27.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.27.attn_output.q_input3Arscale = util.global.load @"__auto.blk.27.attn_output.q_input:rscale" : tensor<f32> | |
%468 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.27.attn_output.weight3Aqs = util.global.load @"__auto.blk.27.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%469 = torch_c.from_builtin_tensor %__auto.blk.27.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.27.ffn_norm.weight = util.global.load @__auto.blk.27.ffn_norm.weight : tensor<4096xbf16> | |
%470 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.27.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.27.ffn_gate.q_input:rscale" : tensor<f32> | |
%471 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.27.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.27.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%472 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.27.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.27.ffn_up.q_input:rscale" : tensor<f32> | |
%473 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.27.ffn_up.weight3Aqs = util.global.load @"__auto.blk.27.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%474 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.27.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.27.ffn_down.q_input:rscale" : tensor<f32> | |
%475 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.27.ffn_down.weight3Aqs = util.global.load @"__auto.blk.27.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%476 = torch_c.from_builtin_tensor %__auto.blk.27.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.28.attn_norm.weight = util.global.load @__auto.blk.28.attn_norm.weight : tensor<4096xbf16> | |
%477 = torch_c.from_builtin_tensor %__auto.blk.28.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.28.attn_q.q_input3Arscale = util.global.load @"__auto.blk.28.attn_q.q_input:rscale" : tensor<f32> | |
%478 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.28.attn_q.weight3Aqs = util.global.load @"__auto.blk.28.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%479 = torch_c.from_builtin_tensor %__auto.blk.28.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.28.attn_k.q_input3Arscale = util.global.load @"__auto.blk.28.attn_k.q_input:rscale" : tensor<f32> | |
%480 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.28.attn_k.weight3Aqs = util.global.load @"__auto.blk.28.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%481 = torch_c.from_builtin_tensor %__auto.blk.28.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.28.attn_v.q_input3Arscale = util.global.load @"__auto.blk.28.attn_v.q_input:rscale" : tensor<f32> | |
%482 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.28.attn_v.weight3Aqs = util.global.load @"__auto.blk.28.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%483 = torch_c.from_builtin_tensor %__auto.blk.28.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.28.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.28.kv_cache.quantizer:rscale" : tensor<f32> | |
%484 = torch_c.from_builtin_tensor %__auto.blk.28.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.28.attn_output.q_input3Arscale = util.global.load @"__auto.blk.28.attn_output.q_input:rscale" : tensor<f32> | |
%485 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.28.attn_output.weight3Aqs = util.global.load @"__auto.blk.28.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%486 = torch_c.from_builtin_tensor %__auto.blk.28.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.28.ffn_norm.weight = util.global.load @__auto.blk.28.ffn_norm.weight : tensor<4096xbf16> | |
%487 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.28.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.28.ffn_gate.q_input:rscale" : tensor<f32> | |
%488 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.28.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.28.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%489 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.28.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.28.ffn_up.q_input:rscale" : tensor<f32> | |
%490 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.28.ffn_up.weight3Aqs = util.global.load @"__auto.blk.28.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%491 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.28.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.28.ffn_down.q_input:rscale" : tensor<f32> | |
%492 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.28.ffn_down.weight3Aqs = util.global.load @"__auto.blk.28.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%493 = torch_c.from_builtin_tensor %__auto.blk.28.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.29.attn_norm.weight = util.global.load @__auto.blk.29.attn_norm.weight : tensor<4096xbf16> | |
%494 = torch_c.from_builtin_tensor %__auto.blk.29.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.29.attn_q.q_input3Arscale = util.global.load @"__auto.blk.29.attn_q.q_input:rscale" : tensor<f32> | |
%495 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.29.attn_q.weight3Aqs = util.global.load @"__auto.blk.29.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%496 = torch_c.from_builtin_tensor %__auto.blk.29.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.29.attn_k.q_input3Arscale = util.global.load @"__auto.blk.29.attn_k.q_input:rscale" : tensor<f32> | |
%497 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.29.attn_k.weight3Aqs = util.global.load @"__auto.blk.29.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%498 = torch_c.from_builtin_tensor %__auto.blk.29.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.29.attn_v.q_input3Arscale = util.global.load @"__auto.blk.29.attn_v.q_input:rscale" : tensor<f32> | |
%499 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.29.attn_v.weight3Aqs = util.global.load @"__auto.blk.29.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%500 = torch_c.from_builtin_tensor %__auto.blk.29.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.29.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.29.kv_cache.quantizer:rscale" : tensor<f32> | |
%501 = torch_c.from_builtin_tensor %__auto.blk.29.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.29.attn_output.q_input3Arscale = util.global.load @"__auto.blk.29.attn_output.q_input:rscale" : tensor<f32> | |
%502 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.29.attn_output.weight3Aqs = util.global.load @"__auto.blk.29.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%503 = torch_c.from_builtin_tensor %__auto.blk.29.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.29.ffn_norm.weight = util.global.load @__auto.blk.29.ffn_norm.weight : tensor<4096xbf16> | |
%504 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.29.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.29.ffn_gate.q_input:rscale" : tensor<f32> | |
%505 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.29.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.29.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%506 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.29.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.29.ffn_up.q_input:rscale" : tensor<f32> | |
%507 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.29.ffn_up.weight3Aqs = util.global.load @"__auto.blk.29.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%508 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.29.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.29.ffn_down.q_input:rscale" : tensor<f32> | |
%509 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.29.ffn_down.weight3Aqs = util.global.load @"__auto.blk.29.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%510 = torch_c.from_builtin_tensor %__auto.blk.29.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.30.attn_norm.weight = util.global.load @__auto.blk.30.attn_norm.weight : tensor<4096xbf16> | |
%511 = torch_c.from_builtin_tensor %__auto.blk.30.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.30.attn_q.q_input3Arscale = util.global.load @"__auto.blk.30.attn_q.q_input:rscale" : tensor<f32> | |
%512 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.30.attn_q.weight3Aqs = util.global.load @"__auto.blk.30.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%513 = torch_c.from_builtin_tensor %__auto.blk.30.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.30.attn_k.q_input3Arscale = util.global.load @"__auto.blk.30.attn_k.q_input:rscale" : tensor<f32> | |
%514 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.30.attn_k.weight3Aqs = util.global.load @"__auto.blk.30.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%515 = torch_c.from_builtin_tensor %__auto.blk.30.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.30.attn_v.q_input3Arscale = util.global.load @"__auto.blk.30.attn_v.q_input:rscale" : tensor<f32> | |
%516 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.30.attn_v.weight3Aqs = util.global.load @"__auto.blk.30.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%517 = torch_c.from_builtin_tensor %__auto.blk.30.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.30.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.30.kv_cache.quantizer:rscale" : tensor<f32> | |
%518 = torch_c.from_builtin_tensor %__auto.blk.30.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.30.attn_output.q_input3Arscale = util.global.load @"__auto.blk.30.attn_output.q_input:rscale" : tensor<f32> | |
%519 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.30.attn_output.weight3Aqs = util.global.load @"__auto.blk.30.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%520 = torch_c.from_builtin_tensor %__auto.blk.30.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.30.ffn_norm.weight = util.global.load @__auto.blk.30.ffn_norm.weight : tensor<4096xbf16> | |
%521 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.30.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.30.ffn_gate.q_input:rscale" : tensor<f32> | |
%522 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.30.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.30.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%523 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.30.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.30.ffn_up.q_input:rscale" : tensor<f32> | |
%524 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.30.ffn_up.weight3Aqs = util.global.load @"__auto.blk.30.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%525 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.30.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.30.ffn_down.q_input:rscale" : tensor<f32> | |
%526 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.30.ffn_down.weight3Aqs = util.global.load @"__auto.blk.30.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%527 = torch_c.from_builtin_tensor %__auto.blk.30.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.blk.31.attn_norm.weight = util.global.load @__auto.blk.31.attn_norm.weight : tensor<4096xbf16> | |
%528 = torch_c.from_builtin_tensor %__auto.blk.31.attn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.31.attn_q.q_input3Arscale = util.global.load @"__auto.blk.31.attn_q.q_input:rscale" : tensor<f32> | |
%529 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.31.attn_q.weight3Aqs = util.global.load @"__auto.blk.31.attn_q.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%530 = torch_c.from_builtin_tensor %__auto.blk.31.attn_q.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.31.attn_k.q_input3Arscale = util.global.load @"__auto.blk.31.attn_k.q_input:rscale" : tensor<f32> | |
%531 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.31.attn_k.weight3Aqs = util.global.load @"__auto.blk.31.attn_k.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%532 = torch_c.from_builtin_tensor %__auto.blk.31.attn_k.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.31.attn_v.q_input3Arscale = util.global.load @"__auto.blk.31.attn_v.q_input:rscale" : tensor<f32> | |
%533 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.31.attn_v.weight3Aqs = util.global.load @"__auto.blk.31.attn_v.weight:qs" : tensor<1024x4096xf8E4M3FNUZ> | |
%534 = torch_c.from_builtin_tensor %__auto.blk.31.attn_v.weight3Aqs : tensor<1024x4096xf8E4M3FNUZ> -> !torch.vtensor<[1024,4096],f8E4M3FNUZ> | |
%__auto.blk.31.kv_cache.quantizer3Arscale = util.global.load @"__auto.blk.31.kv_cache.quantizer:rscale" : tensor<f32> | |
%535 = torch_c.from_builtin_tensor %__auto.blk.31.kv_cache.quantizer3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.31.attn_output.q_input3Arscale = util.global.load @"__auto.blk.31.attn_output.q_input:rscale" : tensor<f32> | |
%536 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.31.attn_output.weight3Aqs = util.global.load @"__auto.blk.31.attn_output.weight:qs" : tensor<4096x4096xf8E4M3FNUZ> | |
%537 = torch_c.from_builtin_tensor %__auto.blk.31.attn_output.weight3Aqs : tensor<4096x4096xf8E4M3FNUZ> -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%__auto.blk.31.ffn_norm.weight = util.global.load @__auto.blk.31.ffn_norm.weight : tensor<4096xbf16> | |
%538 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.blk.31.ffn_gate.q_input3Arscale = util.global.load @"__auto.blk.31.ffn_gate.q_input:rscale" : tensor<f32> | |
%539 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.31.ffn_gate.weight3Aqs = util.global.load @"__auto.blk.31.ffn_gate.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%540 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_gate.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.31.ffn_up.q_input3Arscale = util.global.load @"__auto.blk.31.ffn_up.q_input:rscale" : tensor<f32> | |
%541 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.31.ffn_up.weight3Aqs = util.global.load @"__auto.blk.31.ffn_up.weight:qs" : tensor<14336x4096xf8E4M3FNUZ> | |
%542 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_up.weight3Aqs : tensor<14336x4096xf8E4M3FNUZ> -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%__auto.blk.31.ffn_down.q_input3Arscale = util.global.load @"__auto.blk.31.ffn_down.q_input:rscale" : tensor<f32> | |
%543 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.q_input3Arscale : tensor<f32> -> !torch.vtensor<[],f32> | |
%__auto.blk.31.ffn_down.weight3Aqs = util.global.load @"__auto.blk.31.ffn_down.weight:qs" : tensor<4096x14336xf8E4M3FNUZ> | |
%544 = torch_c.from_builtin_tensor %__auto.blk.31.ffn_down.weight3Aqs : tensor<4096x14336xf8E4M3FNUZ> -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%__auto.output_norm.weight = util.global.load @__auto.output_norm.weight : tensor<4096xbf16> | |
%545 = torch_c.from_builtin_tensor %__auto.output_norm.weight : tensor<4096xbf16> -> !torch.vtensor<[4096],bf16> | |
%__auto.output.weight = util.global.load @__auto.output.weight : tensor<128256x4096xbf16> | |
%546 = torch_c.from_builtin_tensor %__auto.output.weight : tensor<128256x4096xbf16> -> !torch.vtensor<[128256,4096],bf16> | |
%547 = torch.copy.to_vtensor %arg3 : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%548 = torch.symbolic_int "s1" {min_val = 2, max_val = 4095} : !torch.int | |
%549 = torch.symbolic_int "s2" {min_val = 2, max_val = 9223372036854775806} : !torch.int | |
torch.bind_symbolic_shape %arg0, [%548], affine_map<()[s0] -> (1, s0 * 32)> : !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %arg2, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %547, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int-1 = torch.constant.int -1 | |
%false = torch.constant.bool false | |
%false_0 = torch.constant.bool false | |
%550 = torch.aten.embedding %0, %arg0, %int-1, %false, %false_0 : !torch.vtensor<[128256,4096],bf16>, !torch.vtensor<[1,?],si64>, !torch.int, !torch.bool, !torch.bool -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %550, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6 = torch.constant.int 6 | |
%551 = torch.prims.convert_element_type %550, %int6 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %551, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2 = torch.constant.int 2 | |
%552 = torch.aten.pow.Tensor_Scalar %551, %int2 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %552, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_1 = torch.constant.int -1 | |
%553 = torch.prim.ListConstruct %int-1_1 : (!torch.int) -> !torch.list<int> | |
%true = torch.constant.bool true | |
%none = torch.constant.none | |
%554 = torch.aten.mean.dim %552, %553, %true, %none : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %554, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05 = torch.constant.float 1.000000e-05 | |
%int1 = torch.constant.int 1 | |
%555 = torch.aten.add.Scalar %554, %float1.000000e-05, %int1 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %555, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%556 = torch.aten.rsqrt %555 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %556, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%557 = torch.aten.mul.Tensor %551, %556 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %557, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15 = torch.constant.int 15 | |
%558 = torch.prims.convert_element_type %557, %int15 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %558, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%559 = torch.aten.mul.Tensor %1, %558 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %559, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%560 = torch.aten.div.Tensor %559, %2 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %560, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02 = torch.constant.float -2.400000e+02 | |
%float2.400000e02 = torch.constant.float 2.400000e+02 | |
%561 = torch.aten.clamp %560, %float-2.400000e02, %float2.400000e02 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %561, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26 = torch.constant.int 26 | |
%562 = torch.prims.convert_element_type %561, %int26 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %562, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2 = torch.constant.int -2 | |
%int-1_2 = torch.constant.int -1 | |
%563 = torch.aten.transpose.int %3, %int-2, %int-1_2 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int1_3 = torch.constant.int 1 | |
%564 = torch.aten.size.int %arg0, %int1_3 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.int | |
%int4096 = torch.constant.int 4096 | |
%565 = torch.prim.ListConstruct %564, %int4096 : (!torch.int, !torch.int) -> !torch.list<int> | |
%566 = torch.aten.view %562, %565 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %566, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%567 = torch.aten.mm %566, %563 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %567, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_4 = torch.constant.int 1 | |
%int4096_5 = torch.constant.int 4096 | |
%568 = torch.prim.ListConstruct %int1_4, %564, %int4096_5 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%569 = torch.aten.view %567, %568 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %569, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_6 = torch.constant.int 15 | |
%570 = torch.prims.convert_element_type %569, %int15_6 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %570, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%571 = torch.aten.div.Tensor %559, %4 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %571, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_7 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_8 = torch.constant.float 2.400000e+02 | |
%572 = torch.aten.clamp %571, %float-2.400000e02_7, %float2.400000e02_8 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %572, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_9 = torch.constant.int 26 | |
%573 = torch.prims.convert_element_type %572, %int26_9 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %573, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_10 = torch.constant.int -2 | |
%int-1_11 = torch.constant.int -1 | |
%574 = torch.aten.transpose.int %5, %int-2_10, %int-1_11 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_12 = torch.constant.int 4096 | |
%575 = torch.prim.ListConstruct %564, %int4096_12 : (!torch.int, !torch.int) -> !torch.list<int> | |
%576 = torch.aten.view %573, %575 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %576, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%577 = torch.aten.mm %576, %574 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %577, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_13 = torch.constant.int 1 | |
%int1024 = torch.constant.int 1024 | |
%578 = torch.prim.ListConstruct %int1_13, %564, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%579 = torch.aten.view %577, %578 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %579, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_14 = torch.constant.int 15 | |
%580 = torch.prims.convert_element_type %579, %int15_14 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %580, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%581 = torch.aten.div.Tensor %559, %6 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %581, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_15 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_16 = torch.constant.float 2.400000e+02 | |
%582 = torch.aten.clamp %581, %float-2.400000e02_15, %float2.400000e02_16 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %582, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_17 = torch.constant.int 26 | |
%583 = torch.prims.convert_element_type %582, %int26_17 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %583, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_18 = torch.constant.int -2 | |
%int-1_19 = torch.constant.int -1 | |
%584 = torch.aten.transpose.int %7, %int-2_18, %int-1_19 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_20 = torch.constant.int 4096 | |
%585 = torch.prim.ListConstruct %564, %int4096_20 : (!torch.int, !torch.int) -> !torch.list<int> | |
%586 = torch.aten.view %583, %585 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %586, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%587 = torch.aten.mm %586, %584 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %587, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_21 = torch.constant.int 1 | |
%int1024_22 = torch.constant.int 1024 | |
%588 = torch.prim.ListConstruct %int1_21, %564, %int1024_22 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%589 = torch.aten.view %587, %588 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %589, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_23 = torch.constant.int 15 | |
%590 = torch.prims.convert_element_type %589, %int15_23 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %590, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%int1_24 = torch.constant.int 1 | |
%int32 = torch.constant.int 32 | |
%int128 = torch.constant.int 128 | |
%591 = torch.prim.ListConstruct %int1_24, %564, %int32, %int128 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%592 = torch.aten.view %570, %591 : !torch.vtensor<[1,?,4096],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %592, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_25 = torch.constant.int 1 | |
%int8 = torch.constant.int 8 | |
%int128_26 = torch.constant.int 128 | |
%593 = torch.prim.ListConstruct %int1_25, %564, %int8, %int128_26 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%594 = torch.aten.view %580, %593 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %594, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int1_27 = torch.constant.int 1 | |
%int8_28 = torch.constant.int 8 | |
%int128_29 = torch.constant.int 128 | |
%595 = torch.prim.ListConstruct %int1_27, %564, %int8_28, %int128_29 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%596 = torch.aten.view %590, %595 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %596, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int131072 = torch.constant.int 131072 | |
%none_30 = torch.constant.none | |
%none_31 = torch.constant.none | |
%cpu = torch.constant.device "cpu" | |
%false_32 = torch.constant.bool false | |
%597 = torch.aten.arange %int131072, %none_30, %none_31, %cpu, %false_32 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0 = torch.constant.int 0 | |
%int128_33 = torch.constant.int 128 | |
%int2_34 = torch.constant.int 2 | |
%int4 = torch.constant.int 4 | |
%none_35 = torch.constant.none | |
%cpu_36 = torch.constant.device "cpu" | |
%false_37 = torch.constant.bool false | |
%598 = torch.aten.arange.start_step %int0, %int128_33, %int2_34, %int4, %none_35, %cpu_36, %false_37 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_38 = torch.constant.int 6 | |
%599 = torch.prims.convert_element_type %598, %int6_38 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_39 = torch.constant.int 128 | |
%600 = torch.aten.div.Scalar %599, %int128_39 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05 = torch.constant.float 5.000000e+05 | |
%601 = torch.aten.pow.Scalar %float5.000000e05, %600 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%602 = torch.aten.reciprocal %601 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00 = torch.constant.float 1.000000e+00 | |
%603 = torch.aten.mul.Scalar %602, %float1.000000e00 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%604 = torch.aten.reciprocal %603 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00 = torch.constant.float 6.2831853071795862 | |
%605 = torch.aten.mul.Scalar %604, %float6.283190e00 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03 = torch.constant.float 8.192000e+03 | |
%606 = torch.aten.gt.Scalar %605, %float8.192000e03 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_40 = torch.constant.int 8 | |
%607 = torch.aten.div.Scalar %603, %int8_40 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%608 = torch.aten.where.self %606, %607, %603 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%609 = torch.aten.reciprocal %605 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192 = torch.constant.int 8192 | |
%610 = torch.aten.mul.Scalar %609, %int8192 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_41 = torch.constant.int 1 | |
%int1_42 = torch.constant.int 1 | |
%611 = torch.aten.sub.Scalar %610, %int1_41, %int1_42 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3 = torch.constant.int 3 | |
%612 = torch.aten.div.Scalar %611, %int3 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_43 = torch.constant.int 1 | |
%int1_44 = torch.constant.int 1 | |
%613 = torch.aten.rsub.Scalar %612, %int1_43, %int1_44 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%614 = torch.aten.mul.Tensor %613, %608 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_45 = torch.constant.int 8 | |
%615 = torch.aten.div.Scalar %614, %int8_45 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%616 = torch.aten.mul.Tensor %612, %608 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_46 = torch.constant.int 1 | |
%617 = torch.aten.add.Tensor %615, %616, %int1_46 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03 = torch.constant.float 2.048000e+03 | |
%618 = torch.aten.lt.Scalar %605, %float2.048000e03 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%619 = torch.aten.bitwise_not %618 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_47 = torch.constant.float 8.192000e+03 | |
%620 = torch.aten.gt.Scalar %605, %float8.192000e03_47 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%621 = torch.aten.bitwise_not %620 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%622 = torch.aten.mul.Tensor %619, %621 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%623 = torch.aten.where.self %622, %617, %608 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%624 = torch.prim.ListConstruct %623, %623 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_48 = torch.constant.int -1 | |
%625 = torch.aten.cat %624, %int-1_48 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_49 = torch.constant.int 6 | |
%626 = torch.prims.convert_element_type %597, %int6_49 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_50 = torch.constant.int 131072 | |
%int1_51 = torch.constant.int 1 | |
%627 = torch.prim.ListConstruct %int131072_50, %int1_51 : (!torch.int, !torch.int) -> !torch.list<int> | |
%628 = torch.aten.view %626, %627 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%629 = torch.aten.mul.Tensor %628, %625 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%630 = torch.aten.cos %629 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_52 = torch.constant.int 15 | |
%631 = torch.prims.convert_element_type %630, %int15_52 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%632 = torch.aten.sin %629 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_53 = torch.constant.int 15 | |
%633 = torch.prims.convert_element_type %632, %int15_53 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_54 = torch.constant.int 1 | |
%634 = torch.aten.size.int %569, %int1_54 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_55 = torch.constant.int 0 | |
%635 = torch.aten.add.int %int0_55, %634 : !torch.int, !torch.int -> !torch.int | |
%int0_56 = torch.constant.int 0 | |
%int0_57 = torch.constant.int 0 | |
%int1_58 = torch.constant.int 1 | |
%636 = torch.aten.slice.Tensor %631, %int0_56, %int0_57, %635, %int1_58 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %636, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_59 = torch.constant.int 1 | |
%int0_60 = torch.constant.int 0 | |
%int9223372036854775807 = torch.constant.int 9223372036854775807 | |
%int1_61 = torch.constant.int 1 | |
%637 = torch.aten.slice.Tensor %636, %int1_59, %int0_60, %int9223372036854775807, %int1_61 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %637, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_62 = torch.constant.int 0 | |
%638 = torch.aten.add.int %int0_62, %634 : !torch.int, !torch.int -> !torch.int | |
%int0_63 = torch.constant.int 0 | |
%int0_64 = torch.constant.int 0 | |
%int1_65 = torch.constant.int 1 | |
%639 = torch.aten.slice.Tensor %633, %int0_63, %int0_64, %638, %int1_65 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %639, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_66 = torch.constant.int 1 | |
%int0_67 = torch.constant.int 0 | |
%int9223372036854775807_68 = torch.constant.int 9223372036854775807 | |
%int1_69 = torch.constant.int 1 | |
%640 = torch.aten.slice.Tensor %639, %int1_66, %int0_67, %int9223372036854775807_68, %int1_69 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %640, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_70 = torch.constant.int 0 | |
%641 = torch.aten.unsqueeze %637, %int0_70 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %641, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_71 = torch.constant.int 1 | |
%int0_72 = torch.constant.int 0 | |
%int9223372036854775807_73 = torch.constant.int 9223372036854775807 | |
%int1_74 = torch.constant.int 1 | |
%642 = torch.aten.slice.Tensor %641, %int1_71, %int0_72, %int9223372036854775807_73, %int1_74 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %642, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_75 = torch.constant.int 2 | |
%643 = torch.aten.unsqueeze %642, %int2_75 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %643, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_76 = torch.constant.int 3 | |
%int0_77 = torch.constant.int 0 | |
%int9223372036854775807_78 = torch.constant.int 9223372036854775807 | |
%int1_79 = torch.constant.int 1 | |
%644 = torch.aten.slice.Tensor %643, %int3_76, %int0_77, %int9223372036854775807_78, %int1_79 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %644, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_80 = torch.constant.int 0 | |
%645 = torch.aten.unsqueeze %640, %int0_80 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %645, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_81 = torch.constant.int 1 | |
%int0_82 = torch.constant.int 0 | |
%int9223372036854775807_83 = torch.constant.int 9223372036854775807 | |
%int1_84 = torch.constant.int 1 | |
%646 = torch.aten.slice.Tensor %645, %int1_81, %int0_82, %int9223372036854775807_83, %int1_84 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %646, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_85 = torch.constant.int 2 | |
%647 = torch.aten.unsqueeze %646, %int2_85 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %647, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_86 = torch.constant.int 3 | |
%int0_87 = torch.constant.int 0 | |
%int9223372036854775807_88 = torch.constant.int 9223372036854775807 | |
%int1_89 = torch.constant.int 1 | |
%648 = torch.aten.slice.Tensor %647, %int3_86, %int0_87, %int9223372036854775807_88, %int1_89 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %648, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_90 = torch.constant.int 1 | |
%int2_91 = torch.constant.int 2 | |
%649 = torch.aten.transpose.int %644, %int1_90, %int2_91 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %649, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_92 = torch.constant.int 1 | |
%int1_93 = torch.constant.int 1 | |
%int1_94 = torch.constant.int 1 | |
%int1_95 = torch.constant.int 1 | |
%650 = torch.prim.ListConstruct %int1_92, %int1_93, %int1_94, %int1_95 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%651 = torch.aten.repeat %649, %650 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %651, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_96 = torch.constant.int 1 | |
%int2_97 = torch.constant.int 2 | |
%652 = torch.aten.transpose.int %648, %int1_96, %int2_97 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %652, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_98 = torch.constant.int 1 | |
%int2_99 = torch.constant.int 2 | |
%653 = torch.aten.transpose.int %592, %int1_98, %int2_99 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %653, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_100 = torch.constant.int 1 | |
%int1_101 = torch.constant.int 1 | |
%int1_102 = torch.constant.int 1 | |
%int1_103 = torch.constant.int 1 | |
%654 = torch.prim.ListConstruct %int1_100, %int1_101, %int1_102, %int1_103 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%655 = torch.aten.repeat %652, %654 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %655, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%656 = torch.aten.mul.Tensor %653, %651 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %656, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int3_104 = torch.constant.int 3 | |
%int0_105 = torch.constant.int 0 | |
%int64 = torch.constant.int 64 | |
%int1_106 = torch.constant.int 1 | |
%657 = torch.aten.slice.Tensor %653, %int3_104, %int0_105, %int64, %int1_106 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %657, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%int3_107 = torch.constant.int 3 | |
%int64_108 = torch.constant.int 64 | |
%int9223372036854775807_109 = torch.constant.int 9223372036854775807 | |
%int1_110 = torch.constant.int 1 | |
%658 = torch.aten.slice.Tensor %653, %int3_107, %int64_108, %int9223372036854775807_109, %int1_110 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %658, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%659 = torch.aten.neg %658 : !torch.vtensor<[1,32,?,64],bf16> -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %659, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%660 = torch.prim.ListConstruct %659, %657 : (!torch.vtensor<[1,32,?,64],bf16>, !torch.vtensor<[1,32,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_111 = torch.constant.int -1 | |
%661 = torch.aten.cat %660, %int-1_111 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %661, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%662 = torch.aten.mul.Tensor %661, %655 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %662, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_112 = torch.constant.int 1 | |
%663 = torch.aten.add.Tensor %656, %662, %int1_112 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %663, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_113 = torch.constant.int 1 | |
%int2_114 = torch.constant.int 2 | |
%664 = torch.aten.transpose.int %663, %int1_113, %int2_114 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %664, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int131072_115 = torch.constant.int 131072 | |
%none_116 = torch.constant.none | |
%none_117 = torch.constant.none | |
%cpu_118 = torch.constant.device "cpu" | |
%false_119 = torch.constant.bool false | |
%665 = torch.aten.arange %int131072_115, %none_116, %none_117, %cpu_118, %false_119 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_120 = torch.constant.int 0 | |
%int128_121 = torch.constant.int 128 | |
%int2_122 = torch.constant.int 2 | |
%int4_123 = torch.constant.int 4 | |
%none_124 = torch.constant.none | |
%cpu_125 = torch.constant.device "cpu" | |
%false_126 = torch.constant.bool false | |
%666 = torch.aten.arange.start_step %int0_120, %int128_121, %int2_122, %int4_123, %none_124, %cpu_125, %false_126 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_127 = torch.constant.int 6 | |
%667 = torch.prims.convert_element_type %666, %int6_127 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_128 = torch.constant.int 128 | |
%668 = torch.aten.div.Scalar %667, %int128_128 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_129 = torch.constant.float 5.000000e+05 | |
%669 = torch.aten.pow.Scalar %float5.000000e05_129, %668 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%670 = torch.aten.reciprocal %669 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_130 = torch.constant.float 1.000000e+00 | |
%671 = torch.aten.mul.Scalar %670, %float1.000000e00_130 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%672 = torch.aten.reciprocal %671 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_131 = torch.constant.float 6.2831853071795862 | |
%673 = torch.aten.mul.Scalar %672, %float6.283190e00_131 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_132 = torch.constant.float 8.192000e+03 | |
%674 = torch.aten.gt.Scalar %673, %float8.192000e03_132 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_133 = torch.constant.int 8 | |
%675 = torch.aten.div.Scalar %671, %int8_133 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%676 = torch.aten.where.self %674, %675, %671 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%677 = torch.aten.reciprocal %673 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_134 = torch.constant.int 8192 | |
%678 = torch.aten.mul.Scalar %677, %int8192_134 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_135 = torch.constant.int 1 | |
%int1_136 = torch.constant.int 1 | |
%679 = torch.aten.sub.Scalar %678, %int1_135, %int1_136 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_137 = torch.constant.int 3 | |
%680 = torch.aten.div.Scalar %679, %int3_137 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_138 = torch.constant.int 1 | |
%int1_139 = torch.constant.int 1 | |
%681 = torch.aten.rsub.Scalar %680, %int1_138, %int1_139 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%682 = torch.aten.mul.Tensor %681, %676 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_140 = torch.constant.int 8 | |
%683 = torch.aten.div.Scalar %682, %int8_140 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%684 = torch.aten.mul.Tensor %680, %676 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_141 = torch.constant.int 1 | |
%685 = torch.aten.add.Tensor %683, %684, %int1_141 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_142 = torch.constant.float 2.048000e+03 | |
%686 = torch.aten.lt.Scalar %673, %float2.048000e03_142 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%687 = torch.aten.bitwise_not %686 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_143 = torch.constant.float 8.192000e+03 | |
%688 = torch.aten.gt.Scalar %673, %float8.192000e03_143 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%689 = torch.aten.bitwise_not %688 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%690 = torch.aten.mul.Tensor %687, %689 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%691 = torch.aten.where.self %690, %685, %676 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%692 = torch.prim.ListConstruct %691, %691 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_144 = torch.constant.int -1 | |
%693 = torch.aten.cat %692, %int-1_144 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_145 = torch.constant.int 6 | |
%694 = torch.prims.convert_element_type %665, %int6_145 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_146 = torch.constant.int 131072 | |
%int1_147 = torch.constant.int 1 | |
%695 = torch.prim.ListConstruct %int131072_146, %int1_147 : (!torch.int, !torch.int) -> !torch.list<int> | |
%696 = torch.aten.view %694, %695 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%697 = torch.aten.mul.Tensor %696, %693 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%698 = torch.aten.cos %697 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_148 = torch.constant.int 15 | |
%699 = torch.prims.convert_element_type %698, %int15_148 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%700 = torch.aten.sin %697 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_149 = torch.constant.int 15 | |
%701 = torch.prims.convert_element_type %700, %int15_149 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_150 = torch.constant.int 1 | |
%702 = torch.aten.size.int %579, %int1_150 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_151 = torch.constant.int 0 | |
%703 = torch.aten.add.int %int0_151, %702 : !torch.int, !torch.int -> !torch.int | |
%int0_152 = torch.constant.int 0 | |
%int0_153 = torch.constant.int 0 | |
%int1_154 = torch.constant.int 1 | |
%704 = torch.aten.slice.Tensor %699, %int0_152, %int0_153, %703, %int1_154 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %704, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_155 = torch.constant.int 1 | |
%int0_156 = torch.constant.int 0 | |
%int9223372036854775807_157 = torch.constant.int 9223372036854775807 | |
%int1_158 = torch.constant.int 1 | |
%705 = torch.aten.slice.Tensor %704, %int1_155, %int0_156, %int9223372036854775807_157, %int1_158 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %705, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_159 = torch.constant.int 0 | |
%706 = torch.aten.add.int %int0_159, %702 : !torch.int, !torch.int -> !torch.int | |
%int0_160 = torch.constant.int 0 | |
%int0_161 = torch.constant.int 0 | |
%int1_162 = torch.constant.int 1 | |
%707 = torch.aten.slice.Tensor %701, %int0_160, %int0_161, %706, %int1_162 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %707, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_163 = torch.constant.int 1 | |
%int0_164 = torch.constant.int 0 | |
%int9223372036854775807_165 = torch.constant.int 9223372036854775807 | |
%int1_166 = torch.constant.int 1 | |
%708 = torch.aten.slice.Tensor %707, %int1_163, %int0_164, %int9223372036854775807_165, %int1_166 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %708, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_167 = torch.constant.int 0 | |
%709 = torch.aten.unsqueeze %705, %int0_167 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %709, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_168 = torch.constant.int 1 | |
%int0_169 = torch.constant.int 0 | |
%int9223372036854775807_170 = torch.constant.int 9223372036854775807 | |
%int1_171 = torch.constant.int 1 | |
%710 = torch.aten.slice.Tensor %709, %int1_168, %int0_169, %int9223372036854775807_170, %int1_171 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %710, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_172 = torch.constant.int 2 | |
%711 = torch.aten.unsqueeze %710, %int2_172 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %711, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_173 = torch.constant.int 3 | |
%int0_174 = torch.constant.int 0 | |
%int9223372036854775807_175 = torch.constant.int 9223372036854775807 | |
%int1_176 = torch.constant.int 1 | |
%712 = torch.aten.slice.Tensor %711, %int3_173, %int0_174, %int9223372036854775807_175, %int1_176 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %712, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_177 = torch.constant.int 0 | |
%713 = torch.aten.unsqueeze %708, %int0_177 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %713, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_178 = torch.constant.int 1 | |
%int0_179 = torch.constant.int 0 | |
%int9223372036854775807_180 = torch.constant.int 9223372036854775807 | |
%int1_181 = torch.constant.int 1 | |
%714 = torch.aten.slice.Tensor %713, %int1_178, %int0_179, %int9223372036854775807_180, %int1_181 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %714, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_182 = torch.constant.int 2 | |
%715 = torch.aten.unsqueeze %714, %int2_182 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %715, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_183 = torch.constant.int 3 | |
%int0_184 = torch.constant.int 0 | |
%int9223372036854775807_185 = torch.constant.int 9223372036854775807 | |
%int1_186 = torch.constant.int 1 | |
%716 = torch.aten.slice.Tensor %715, %int3_183, %int0_184, %int9223372036854775807_185, %int1_186 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %716, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_187 = torch.constant.int 1 | |
%int2_188 = torch.constant.int 2 | |
%717 = torch.aten.transpose.int %712, %int1_187, %int2_188 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %717, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_189 = torch.constant.int 1 | |
%int1_190 = torch.constant.int 1 | |
%int1_191 = torch.constant.int 1 | |
%int1_192 = torch.constant.int 1 | |
%718 = torch.prim.ListConstruct %int1_189, %int1_190, %int1_191, %int1_192 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%719 = torch.aten.repeat %717, %718 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %719, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_193 = torch.constant.int 1 | |
%int2_194 = torch.constant.int 2 | |
%720 = torch.aten.transpose.int %716, %int1_193, %int2_194 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %720, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_195 = torch.constant.int 1 | |
%int2_196 = torch.constant.int 2 | |
%721 = torch.aten.transpose.int %594, %int1_195, %int2_196 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %721, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_197 = torch.constant.int 1 | |
%int1_198 = torch.constant.int 1 | |
%int1_199 = torch.constant.int 1 | |
%int1_200 = torch.constant.int 1 | |
%722 = torch.prim.ListConstruct %int1_197, %int1_198, %int1_199, %int1_200 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%723 = torch.aten.repeat %720, %722 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %723, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%724 = torch.aten.mul.Tensor %721, %719 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %724, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int3_201 = torch.constant.int 3 | |
%int0_202 = torch.constant.int 0 | |
%int64_203 = torch.constant.int 64 | |
%int1_204 = torch.constant.int 1 | |
%725 = torch.aten.slice.Tensor %721, %int3_201, %int0_202, %int64_203, %int1_204 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %725, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%int3_205 = torch.constant.int 3 | |
%int64_206 = torch.constant.int 64 | |
%int9223372036854775807_207 = torch.constant.int 9223372036854775807 | |
%int1_208 = torch.constant.int 1 | |
%726 = torch.aten.slice.Tensor %721, %int3_205, %int64_206, %int9223372036854775807_207, %int1_208 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %726, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%727 = torch.aten.neg %726 : !torch.vtensor<[1,8,?,64],bf16> -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %727, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%728 = torch.prim.ListConstruct %727, %725 : (!torch.vtensor<[1,8,?,64],bf16>, !torch.vtensor<[1,8,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_209 = torch.constant.int -1 | |
%729 = torch.aten.cat %728, %int-1_209 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %729, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%730 = torch.aten.mul.Tensor %729, %723 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %730, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_210 = torch.constant.int 1 | |
%731 = torch.aten.add.Tensor %724, %730, %int1_210 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,8,?,128],bf16>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %731, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_211 = torch.constant.int 1 | |
%int2_212 = torch.constant.int 2 | |
%732 = torch.aten.transpose.int %731, %int1_211, %int2_212 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %732, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%733 = torch.aten.div.Tensor %732, %8 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %733, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_213 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_214 = torch.constant.float 2.400000e+02 | |
%734 = torch.aten.clamp %733, %float-2.400000e02_213, %float2.400000e02_214 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %734, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_215 = torch.constant.int 26 | |
%735 = torch.prims.convert_element_type %734, %int26_215 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %735, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%736 = torch.aten.div.Tensor %596, %8 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %736, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_216 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_217 = torch.constant.float 2.400000e+02 | |
%737 = torch.aten.clamp %736, %float-2.400000e02_216, %float2.400000e02_217 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %737, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_218 = torch.constant.int 26 | |
%738 = torch.prims.convert_element_type %737, %int26_218 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %738, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%int0_219 = torch.constant.int 0 | |
%739 = torch.aten.size.int %547, %int0_219 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int32_220 = torch.constant.int 32 | |
%int2_221 = torch.constant.int 2 | |
%int32_222 = torch.constant.int 32 | |
%int8_223 = torch.constant.int 8 | |
%int128_224 = torch.constant.int 128 | |
%740 = torch.prim.ListConstruct %739, %int32_220, %int2_221, %int32_222, %int8_223, %int128_224 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%741 = torch.aten.view %547, %740 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %741, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_225 = torch.constant.int 32 | |
%742 = torch.aten.mul.int %739, %int32_225 : !torch.int, !torch.int -> !torch.int | |
%int2_226 = torch.constant.int 2 | |
%743 = torch.aten.mul.int %742, %int2_226 : !torch.int, !torch.int -> !torch.int | |
%int32_227 = torch.constant.int 32 | |
%int8_228 = torch.constant.int 8 | |
%int128_229 = torch.constant.int 128 | |
%744 = torch.prim.ListConstruct %743, %int32_227, %int8_228, %int128_229 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%745 = torch.aten.view %741, %744 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %745, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int64_230 = torch.constant.int 64 | |
%746 = torch.aten.mul.Scalar %arg2, %int64_230 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %746, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int0_231 = torch.constant.int 0 | |
%int1_232 = torch.constant.int 1 | |
%747 = torch.aten.add.Scalar %746, %int0_231, %int1_232 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %747, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int1_233 = torch.constant.int 1 | |
%748 = torch.aten.size.int %arg2, %int1_233 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.int | |
%int1_234 = torch.constant.int 1 | |
%int32_235 = torch.constant.int 32 | |
%int8_236 = torch.constant.int 8 | |
%int128_237 = torch.constant.int 128 | |
%749 = torch.prim.ListConstruct %int1_234, %748, %int32_235, %int8_236, %int128_237 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%750 = torch.aten.view %735, %749 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %750, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_238 = torch.constant.int 32 | |
%int8_239 = torch.constant.int 8 | |
%int128_240 = torch.constant.int 128 | |
%751 = torch.prim.ListConstruct %748, %int32_238, %int8_239, %int128_240 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%752 = torch.aten.view %750, %751 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %752, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%753 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%754 = torch.aten.view %747, %753 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %754, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%755 = torch.prim.ListConstruct %754 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_241 = torch.constant.bool false | |
%756 = torch.aten.index_put %745, %755, %752, %false_241 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %756, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_242 = torch.constant.int 32 | |
%int2_243 = torch.constant.int 2 | |
%int32_244 = torch.constant.int 32 | |
%int8_245 = torch.constant.int 8 | |
%int128_246 = torch.constant.int 128 | |
%757 = torch.prim.ListConstruct %739, %int32_242, %int2_243, %int32_244, %int8_245, %int128_246 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%758 = torch.aten.view %756, %757 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %758, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152 = torch.constant.int 2097152 | |
%759 = torch.prim.ListConstruct %739, %int2097152 : (!torch.int, !torch.int) -> !torch.list<int> | |
%760 = torch.aten.view %758, %759 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %760, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int32_247 = torch.constant.int 32 | |
%int2_248 = torch.constant.int 2 | |
%int32_249 = torch.constant.int 32 | |
%int8_250 = torch.constant.int 8 | |
%int128_251 = torch.constant.int 128 | |
%761 = torch.prim.ListConstruct %739, %int32_247, %int2_248, %int32_249, %int8_250, %int128_251 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%762 = torch.aten.view %760, %761 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %762, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_252 = torch.constant.int 32 | |
%int8_253 = torch.constant.int 8 | |
%int128_254 = torch.constant.int 128 | |
%763 = torch.prim.ListConstruct %743, %int32_252, %int8_253, %int128_254 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%764 = torch.aten.view %762, %763 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %764, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_255 = torch.constant.int 1 | |
%int32_256 = torch.constant.int 32 | |
%int8_257 = torch.constant.int 8 | |
%int128_258 = torch.constant.int 128 | |
%765 = torch.prim.ListConstruct %int1_255, %748, %int32_256, %int8_257, %int128_258 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%766 = torch.aten.view %738, %765 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %766, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_259 = torch.constant.int 32 | |
%int8_260 = torch.constant.int 8 | |
%int128_261 = torch.constant.int 128 | |
%767 = torch.prim.ListConstruct %748, %int32_259, %int8_260, %int128_261 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%768 = torch.aten.view %766, %767 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %768, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_262 = torch.constant.int 1 | |
%int1_263 = torch.constant.int 1 | |
%769 = torch.aten.add.Scalar %747, %int1_262, %int1_263 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %769, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%770 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%771 = torch.aten.view %769, %770 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %771, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%772 = torch.prim.ListConstruct %771 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_264 = torch.constant.bool false | |
%773 = torch.aten.index_put %764, %772, %768, %false_264 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %773, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_265 = torch.constant.int 32 | |
%int2_266 = torch.constant.int 2 | |
%int32_267 = torch.constant.int 32 | |
%int8_268 = torch.constant.int 8 | |
%int128_269 = torch.constant.int 128 | |
%774 = torch.prim.ListConstruct %739, %int32_265, %int2_266, %int32_267, %int8_268, %int128_269 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%775 = torch.aten.view %773, %774 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %775, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_270 = torch.constant.int 2097152 | |
%776 = torch.prim.ListConstruct %739, %int2097152_270 : (!torch.int, !torch.int) -> !torch.list<int> | |
%777 = torch.aten.view %775, %776 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %777, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int-2_271 = torch.constant.int -2 | |
%778 = torch.aten.unsqueeze %735, %int-2_271 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %778, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_272 = torch.constant.int 1 | |
%int8_273 = torch.constant.int 8 | |
%int4_274 = torch.constant.int 4 | |
%int128_275 = torch.constant.int 128 | |
%779 = torch.prim.ListConstruct %int1_272, %702, %int8_273, %int4_274, %int128_275 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_276 = torch.constant.bool false | |
%780 = torch.aten.expand %778, %779, %false_276 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %780, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_277 = torch.constant.int 0 | |
%781 = torch.aten.clone %780, %int0_277 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %781, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_278 = torch.constant.int 1 | |
%int32_279 = torch.constant.int 32 | |
%int128_280 = torch.constant.int 128 | |
%782 = torch.prim.ListConstruct %int1_278, %702, %int32_279, %int128_280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%783 = torch.aten._unsafe_view %781, %782 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %783, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int-2_281 = torch.constant.int -2 | |
%784 = torch.aten.unsqueeze %738, %int-2_281 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %784, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_282 = torch.constant.int 1 | |
%785 = torch.aten.size.int %589, %int1_282 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int1_283 = torch.constant.int 1 | |
%int8_284 = torch.constant.int 8 | |
%int4_285 = torch.constant.int 4 | |
%int128_286 = torch.constant.int 128 | |
%786 = torch.prim.ListConstruct %int1_283, %785, %int8_284, %int4_285, %int128_286 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_287 = torch.constant.bool false | |
%787 = torch.aten.expand %784, %786, %false_287 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %787, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_288 = torch.constant.int 0 | |
%788 = torch.aten.clone %787, %int0_288 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %788, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_289 = torch.constant.int 1 | |
%int32_290 = torch.constant.int 32 | |
%int128_291 = torch.constant.int 128 | |
%789 = torch.prim.ListConstruct %int1_289, %785, %int32_290, %int128_291 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%790 = torch.aten._unsafe_view %788, %789 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %790, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int6_292 = torch.constant.int 6 | |
%791 = torch.prims.convert_element_type %783, %int6_292 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %791, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%792 = torch.aten.mul.Tensor %791, %8 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %792, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_293 = torch.constant.int 15 | |
%793 = torch.prims.convert_element_type %792, %int15_293 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %793, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int6_294 = torch.constant.int 6 | |
%794 = torch.prims.convert_element_type %790, %int6_294 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %794, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%795 = torch.aten.mul.Tensor %794, %8 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %795, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_295 = torch.constant.int 15 | |
%796 = torch.prims.convert_element_type %795, %int15_295 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %796, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_296 = torch.constant.int 1 | |
%int2_297 = torch.constant.int 2 | |
%797 = torch.aten.transpose.int %664, %int1_296, %int2_297 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %797, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_298 = torch.constant.int 1 | |
%int2_299 = torch.constant.int 2 | |
%798 = torch.aten.transpose.int %793, %int1_298, %int2_299 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %798, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_300 = torch.constant.int 1 | |
%int2_301 = torch.constant.int 2 | |
%799 = torch.aten.transpose.int %796, %int1_300, %int2_301 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %799, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%float0.000000e00 = torch.constant.float 0.000000e+00 | |
%true_302 = torch.constant.bool true | |
%none_303 = torch.constant.none | |
%none_304 = torch.constant.none | |
%800:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%797, %798, %799, %float0.000000e00, %true_302, %none_303, %none_304) : (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?],f32>) | |
torch.bind_symbolic_shape %800#0, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_305 = torch.constant.int 1 | |
%int2_306 = torch.constant.int 2 | |
%801 = torch.aten.transpose.int %800#0, %int1_305, %int2_306 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %801, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_307 = torch.constant.int 1 | |
%int4096_308 = torch.constant.int 4096 | |
%802 = torch.prim.ListConstruct %int1_307, %634, %int4096_308 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%803 = torch.aten.view %801, %802 : !torch.vtensor<[1,?,32,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %803, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%804 = torch.aten.div.Tensor %803, %9 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %804, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_309 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_310 = torch.constant.float 2.400000e+02 | |
%805 = torch.aten.clamp %804, %float-2.400000e02_309, %float2.400000e02_310 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %805, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_311 = torch.constant.int 26 | |
%806 = torch.prims.convert_element_type %805, %int26_311 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %806, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_312 = torch.constant.int -2 | |
%int-1_313 = torch.constant.int -1 | |
%807 = torch.aten.transpose.int %10, %int-2_312, %int-1_313 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_314 = torch.constant.int 4096 | |
%808 = torch.prim.ListConstruct %634, %int4096_314 : (!torch.int, !torch.int) -> !torch.list<int> | |
%809 = torch.aten.view %806, %808 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %809, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%810 = torch.aten.mm %809, %807 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %810, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_315 = torch.constant.int 1 | |
%int4096_316 = torch.constant.int 4096 | |
%811 = torch.prim.ListConstruct %int1_315, %634, %int4096_316 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%812 = torch.aten.view %810, %811 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %812, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_317 = torch.constant.int 15 | |
%813 = torch.prims.convert_element_type %812, %int15_317 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %813, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_318 = torch.constant.int 1 | |
%814 = torch.aten.add.Tensor %550, %813, %int1_318 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %814, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_319 = torch.constant.int 6 | |
%815 = torch.prims.convert_element_type %814, %int6_319 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %815, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_320 = torch.constant.int 2 | |
%816 = torch.aten.pow.Tensor_Scalar %815, %int2_320 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %816, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_321 = torch.constant.int -1 | |
%817 = torch.prim.ListConstruct %int-1_321 : (!torch.int) -> !torch.list<int> | |
%true_322 = torch.constant.bool true | |
%none_323 = torch.constant.none | |
%818 = torch.aten.mean.dim %816, %817, %true_322, %none_323 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %818, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_324 = torch.constant.float 1.000000e-05 | |
%int1_325 = torch.constant.int 1 | |
%819 = torch.aten.add.Scalar %818, %float1.000000e-05_324, %int1_325 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %819, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%820 = torch.aten.rsqrt %819 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %820, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%821 = torch.aten.mul.Tensor %815, %820 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %821, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_326 = torch.constant.int 15 | |
%822 = torch.prims.convert_element_type %821, %int15_326 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %822, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%823 = torch.aten.mul.Tensor %11, %822 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %823, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%824 = torch.aten.div.Tensor %823, %12 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %824, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_327 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_328 = torch.constant.float 2.400000e+02 | |
%825 = torch.aten.clamp %824, %float-2.400000e02_327, %float2.400000e02_328 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %825, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_329 = torch.constant.int 26 | |
%826 = torch.prims.convert_element_type %825, %int26_329 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %826, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_330 = torch.constant.int -2 | |
%int-1_331 = torch.constant.int -1 | |
%827 = torch.aten.transpose.int %13, %int-2_330, %int-1_331 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_332 = torch.constant.int 4096 | |
%828 = torch.prim.ListConstruct %564, %int4096_332 : (!torch.int, !torch.int) -> !torch.list<int> | |
%829 = torch.aten.view %826, %828 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %829, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%830 = torch.aten.mm %829, %827 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %830, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_333 = torch.constant.int 1 | |
%int14336 = torch.constant.int 14336 | |
%831 = torch.prim.ListConstruct %int1_333, %564, %int14336 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%832 = torch.aten.view %830, %831 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %832, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_334 = torch.constant.int 15 | |
%833 = torch.prims.convert_element_type %832, %int15_334 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %833, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%834 = torch.aten.silu %833 : !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %834, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%835 = torch.aten.div.Tensor %823, %14 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %835, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_335 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_336 = torch.constant.float 2.400000e+02 | |
%836 = torch.aten.clamp %835, %float-2.400000e02_335, %float2.400000e02_336 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %836, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_337 = torch.constant.int 26 | |
%837 = torch.prims.convert_element_type %836, %int26_337 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %837, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_338 = torch.constant.int -2 | |
%int-1_339 = torch.constant.int -1 | |
%838 = torch.aten.transpose.int %15, %int-2_338, %int-1_339 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_340 = torch.constant.int 4096 | |
%839 = torch.prim.ListConstruct %564, %int4096_340 : (!torch.int, !torch.int) -> !torch.list<int> | |
%840 = torch.aten.view %837, %839 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %840, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%841 = torch.aten.mm %840, %838 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %841, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_341 = torch.constant.int 1 | |
%int14336_342 = torch.constant.int 14336 | |
%842 = torch.prim.ListConstruct %int1_341, %564, %int14336_342 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%843 = torch.aten.view %841, %842 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %843, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_343 = torch.constant.int 15 | |
%844 = torch.prims.convert_element_type %843, %int15_343 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %844, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%845 = torch.aten.mul.Tensor %834, %844 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %845, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%846 = torch.aten.div.Tensor %845, %16 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %846, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%float-2.400000e02_344 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_345 = torch.constant.float 2.400000e+02 | |
%847 = torch.aten.clamp %846, %float-2.400000e02_344, %float2.400000e02_345 : !torch.vtensor<[1,?,14336],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %847, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%int26_346 = torch.constant.int 26 | |
%848 = torch.prims.convert_element_type %847, %int26_346 : !torch.vtensor<[1,?,14336],bf16>, !torch.int -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %848, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int-2_347 = torch.constant.int -2 | |
%int-1_348 = torch.constant.int -1 | |
%849 = torch.aten.transpose.int %17, %int-2_347, %int-1_348 : !torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%int1_349 = torch.constant.int 1 | |
%850 = torch.aten.size.int %832, %int1_349 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int14336_350 = torch.constant.int 14336 | |
%851 = torch.prim.ListConstruct %850, %int14336_350 : (!torch.int, !torch.int) -> !torch.list<int> | |
%852 = torch.aten.view %848, %851 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %852, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%853 = torch.aten.mm %852, %849 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.vtensor<[14336,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %853, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_351 = torch.constant.int 1 | |
%int4096_352 = torch.constant.int 4096 | |
%854 = torch.prim.ListConstruct %int1_351, %850, %int4096_352 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%855 = torch.aten.view %853, %854 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %855, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_353 = torch.constant.int 15 | |
%856 = torch.prims.convert_element_type %855, %int15_353 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %856, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_354 = torch.constant.int 1 | |
%857 = torch.aten.add.Tensor %814, %856, %int1_354 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %857, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_355 = torch.constant.int 6 | |
%858 = torch.prims.convert_element_type %857, %int6_355 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %858, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_356 = torch.constant.int 2 | |
%859 = torch.aten.pow.Tensor_Scalar %858, %int2_356 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %859, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_357 = torch.constant.int -1 | |
%860 = torch.prim.ListConstruct %int-1_357 : (!torch.int) -> !torch.list<int> | |
%true_358 = torch.constant.bool true | |
%none_359 = torch.constant.none | |
%861 = torch.aten.mean.dim %859, %860, %true_358, %none_359 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %861, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_360 = torch.constant.float 1.000000e-05 | |
%int1_361 = torch.constant.int 1 | |
%862 = torch.aten.add.Scalar %861, %float1.000000e-05_360, %int1_361 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %862, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%863 = torch.aten.rsqrt %862 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %863, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%864 = torch.aten.mul.Tensor %858, %863 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %864, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_362 = torch.constant.int 15 | |
%865 = torch.prims.convert_element_type %864, %int15_362 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %865, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%866 = torch.aten.mul.Tensor %18, %865 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %866, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%867 = torch.aten.div.Tensor %866, %19 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %867, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_363 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_364 = torch.constant.float 2.400000e+02 | |
%868 = torch.aten.clamp %867, %float-2.400000e02_363, %float2.400000e02_364 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %868, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_365 = torch.constant.int 26 | |
%869 = torch.prims.convert_element_type %868, %int26_365 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %869, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_366 = torch.constant.int -2 | |
%int-1_367 = torch.constant.int -1 | |
%870 = torch.aten.transpose.int %20, %int-2_366, %int-1_367 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_368 = torch.constant.int 4096 | |
%871 = torch.prim.ListConstruct %564, %int4096_368 : (!torch.int, !torch.int) -> !torch.list<int> | |
%872 = torch.aten.view %869, %871 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %872, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%873 = torch.aten.mm %872, %870 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %873, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_369 = torch.constant.int 1 | |
%int4096_370 = torch.constant.int 4096 | |
%874 = torch.prim.ListConstruct %int1_369, %564, %int4096_370 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%875 = torch.aten.view %873, %874 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %875, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_371 = torch.constant.int 15 | |
%876 = torch.prims.convert_element_type %875, %int15_371 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %876, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%877 = torch.aten.div.Tensor %866, %21 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %877, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_372 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_373 = torch.constant.float 2.400000e+02 | |
%878 = torch.aten.clamp %877, %float-2.400000e02_372, %float2.400000e02_373 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %878, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_374 = torch.constant.int 26 | |
%879 = torch.prims.convert_element_type %878, %int26_374 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %879, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_375 = torch.constant.int -2 | |
%int-1_376 = torch.constant.int -1 | |
%880 = torch.aten.transpose.int %22, %int-2_375, %int-1_376 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_377 = torch.constant.int 4096 | |
%881 = torch.prim.ListConstruct %564, %int4096_377 : (!torch.int, !torch.int) -> !torch.list<int> | |
%882 = torch.aten.view %879, %881 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %882, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%883 = torch.aten.mm %882, %880 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %883, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_378 = torch.constant.int 1 | |
%int1024_379 = torch.constant.int 1024 | |
%884 = torch.prim.ListConstruct %int1_378, %564, %int1024_379 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%885 = torch.aten.view %883, %884 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %885, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_380 = torch.constant.int 15 | |
%886 = torch.prims.convert_element_type %885, %int15_380 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %886, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%887 = torch.aten.div.Tensor %866, %23 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %887, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_381 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_382 = torch.constant.float 2.400000e+02 | |
%888 = torch.aten.clamp %887, %float-2.400000e02_381, %float2.400000e02_382 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %888, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_383 = torch.constant.int 26 | |
%889 = torch.prims.convert_element_type %888, %int26_383 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %889, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_384 = torch.constant.int -2 | |
%int-1_385 = torch.constant.int -1 | |
%890 = torch.aten.transpose.int %24, %int-2_384, %int-1_385 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_386 = torch.constant.int 4096 | |
%891 = torch.prim.ListConstruct %564, %int4096_386 : (!torch.int, !torch.int) -> !torch.list<int> | |
%892 = torch.aten.view %889, %891 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %892, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%893 = torch.aten.mm %892, %890 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %893, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_387 = torch.constant.int 1 | |
%int1024_388 = torch.constant.int 1024 | |
%894 = torch.prim.ListConstruct %int1_387, %564, %int1024_388 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%895 = torch.aten.view %893, %894 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %895, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_389 = torch.constant.int 15 | |
%896 = torch.prims.convert_element_type %895, %int15_389 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %896, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%int1_390 = torch.constant.int 1 | |
%int32_391 = torch.constant.int 32 | |
%int128_392 = torch.constant.int 128 | |
%897 = torch.prim.ListConstruct %int1_390, %564, %int32_391, %int128_392 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%898 = torch.aten.view %876, %897 : !torch.vtensor<[1,?,4096],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %898, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_393 = torch.constant.int 1 | |
%int8_394 = torch.constant.int 8 | |
%int128_395 = torch.constant.int 128 | |
%899 = torch.prim.ListConstruct %int1_393, %564, %int8_394, %int128_395 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%900 = torch.aten.view %886, %899 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %900, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int1_396 = torch.constant.int 1 | |
%int8_397 = torch.constant.int 8 | |
%int128_398 = torch.constant.int 128 | |
%901 = torch.prim.ListConstruct %int1_396, %564, %int8_397, %int128_398 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%902 = torch.aten.view %896, %901 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %902, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int131072_399 = torch.constant.int 131072 | |
%none_400 = torch.constant.none | |
%none_401 = torch.constant.none | |
%cpu_402 = torch.constant.device "cpu" | |
%false_403 = torch.constant.bool false | |
%903 = torch.aten.arange %int131072_399, %none_400, %none_401, %cpu_402, %false_403 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_404 = torch.constant.int 0 | |
%int128_405 = torch.constant.int 128 | |
%int2_406 = torch.constant.int 2 | |
%int4_407 = torch.constant.int 4 | |
%none_408 = torch.constant.none | |
%cpu_409 = torch.constant.device "cpu" | |
%false_410 = torch.constant.bool false | |
%904 = torch.aten.arange.start_step %int0_404, %int128_405, %int2_406, %int4_407, %none_408, %cpu_409, %false_410 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_411 = torch.constant.int 6 | |
%905 = torch.prims.convert_element_type %904, %int6_411 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_412 = torch.constant.int 128 | |
%906 = torch.aten.div.Scalar %905, %int128_412 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_413 = torch.constant.float 5.000000e+05 | |
%907 = torch.aten.pow.Scalar %float5.000000e05_413, %906 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%908 = torch.aten.reciprocal %907 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_414 = torch.constant.float 1.000000e+00 | |
%909 = torch.aten.mul.Scalar %908, %float1.000000e00_414 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%910 = torch.aten.reciprocal %909 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_415 = torch.constant.float 6.2831853071795862 | |
%911 = torch.aten.mul.Scalar %910, %float6.283190e00_415 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_416 = torch.constant.float 8.192000e+03 | |
%912 = torch.aten.gt.Scalar %911, %float8.192000e03_416 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_417 = torch.constant.int 8 | |
%913 = torch.aten.div.Scalar %909, %int8_417 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%914 = torch.aten.where.self %912, %913, %909 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%915 = torch.aten.reciprocal %911 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_418 = torch.constant.int 8192 | |
%916 = torch.aten.mul.Scalar %915, %int8192_418 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_419 = torch.constant.int 1 | |
%int1_420 = torch.constant.int 1 | |
%917 = torch.aten.sub.Scalar %916, %int1_419, %int1_420 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_421 = torch.constant.int 3 | |
%918 = torch.aten.div.Scalar %917, %int3_421 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_422 = torch.constant.int 1 | |
%int1_423 = torch.constant.int 1 | |
%919 = torch.aten.rsub.Scalar %918, %int1_422, %int1_423 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%920 = torch.aten.mul.Tensor %919, %914 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_424 = torch.constant.int 8 | |
%921 = torch.aten.div.Scalar %920, %int8_424 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%922 = torch.aten.mul.Tensor %918, %914 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_425 = torch.constant.int 1 | |
%923 = torch.aten.add.Tensor %921, %922, %int1_425 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_426 = torch.constant.float 2.048000e+03 | |
%924 = torch.aten.lt.Scalar %911, %float2.048000e03_426 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%925 = torch.aten.bitwise_not %924 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_427 = torch.constant.float 8.192000e+03 | |
%926 = torch.aten.gt.Scalar %911, %float8.192000e03_427 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%927 = torch.aten.bitwise_not %926 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%928 = torch.aten.mul.Tensor %925, %927 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%929 = torch.aten.where.self %928, %923, %914 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%930 = torch.prim.ListConstruct %929, %929 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_428 = torch.constant.int -1 | |
%931 = torch.aten.cat %930, %int-1_428 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_429 = torch.constant.int 6 | |
%932 = torch.prims.convert_element_type %903, %int6_429 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_430 = torch.constant.int 131072 | |
%int1_431 = torch.constant.int 1 | |
%933 = torch.prim.ListConstruct %int131072_430, %int1_431 : (!torch.int, !torch.int) -> !torch.list<int> | |
%934 = torch.aten.view %932, %933 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%935 = torch.aten.mul.Tensor %934, %931 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%936 = torch.aten.cos %935 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_432 = torch.constant.int 15 | |
%937 = torch.prims.convert_element_type %936, %int15_432 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%938 = torch.aten.sin %935 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_433 = torch.constant.int 15 | |
%939 = torch.prims.convert_element_type %938, %int15_433 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_434 = torch.constant.int 1 | |
%940 = torch.aten.size.int %875, %int1_434 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_435 = torch.constant.int 0 | |
%941 = torch.aten.add.int %int0_435, %940 : !torch.int, !torch.int -> !torch.int | |
%int0_436 = torch.constant.int 0 | |
%int0_437 = torch.constant.int 0 | |
%int1_438 = torch.constant.int 1 | |
%942 = torch.aten.slice.Tensor %937, %int0_436, %int0_437, %941, %int1_438 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %942, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_439 = torch.constant.int 1 | |
%int0_440 = torch.constant.int 0 | |
%int9223372036854775807_441 = torch.constant.int 9223372036854775807 | |
%int1_442 = torch.constant.int 1 | |
%943 = torch.aten.slice.Tensor %942, %int1_439, %int0_440, %int9223372036854775807_441, %int1_442 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %943, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_443 = torch.constant.int 0 | |
%944 = torch.aten.add.int %int0_443, %940 : !torch.int, !torch.int -> !torch.int | |
%int0_444 = torch.constant.int 0 | |
%int0_445 = torch.constant.int 0 | |
%int1_446 = torch.constant.int 1 | |
%945 = torch.aten.slice.Tensor %939, %int0_444, %int0_445, %944, %int1_446 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %945, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_447 = torch.constant.int 1 | |
%int0_448 = torch.constant.int 0 | |
%int9223372036854775807_449 = torch.constant.int 9223372036854775807 | |
%int1_450 = torch.constant.int 1 | |
%946 = torch.aten.slice.Tensor %945, %int1_447, %int0_448, %int9223372036854775807_449, %int1_450 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %946, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_451 = torch.constant.int 0 | |
%947 = torch.aten.unsqueeze %943, %int0_451 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %947, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_452 = torch.constant.int 1 | |
%int0_453 = torch.constant.int 0 | |
%int9223372036854775807_454 = torch.constant.int 9223372036854775807 | |
%int1_455 = torch.constant.int 1 | |
%948 = torch.aten.slice.Tensor %947, %int1_452, %int0_453, %int9223372036854775807_454, %int1_455 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %948, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_456 = torch.constant.int 2 | |
%949 = torch.aten.unsqueeze %948, %int2_456 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %949, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_457 = torch.constant.int 3 | |
%int0_458 = torch.constant.int 0 | |
%int9223372036854775807_459 = torch.constant.int 9223372036854775807 | |
%int1_460 = torch.constant.int 1 | |
%950 = torch.aten.slice.Tensor %949, %int3_457, %int0_458, %int9223372036854775807_459, %int1_460 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %950, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_461 = torch.constant.int 0 | |
%951 = torch.aten.unsqueeze %946, %int0_461 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %951, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_462 = torch.constant.int 1 | |
%int0_463 = torch.constant.int 0 | |
%int9223372036854775807_464 = torch.constant.int 9223372036854775807 | |
%int1_465 = torch.constant.int 1 | |
%952 = torch.aten.slice.Tensor %951, %int1_462, %int0_463, %int9223372036854775807_464, %int1_465 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %952, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_466 = torch.constant.int 2 | |
%953 = torch.aten.unsqueeze %952, %int2_466 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %953, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_467 = torch.constant.int 3 | |
%int0_468 = torch.constant.int 0 | |
%int9223372036854775807_469 = torch.constant.int 9223372036854775807 | |
%int1_470 = torch.constant.int 1 | |
%954 = torch.aten.slice.Tensor %953, %int3_467, %int0_468, %int9223372036854775807_469, %int1_470 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %954, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_471 = torch.constant.int 1 | |
%int2_472 = torch.constant.int 2 | |
%955 = torch.aten.transpose.int %950, %int1_471, %int2_472 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %955, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_473 = torch.constant.int 1 | |
%int1_474 = torch.constant.int 1 | |
%int1_475 = torch.constant.int 1 | |
%int1_476 = torch.constant.int 1 | |
%956 = torch.prim.ListConstruct %int1_473, %int1_474, %int1_475, %int1_476 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%957 = torch.aten.repeat %955, %956 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %957, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_477 = torch.constant.int 1 | |
%int2_478 = torch.constant.int 2 | |
%958 = torch.aten.transpose.int %954, %int1_477, %int2_478 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %958, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_479 = torch.constant.int 1 | |
%int2_480 = torch.constant.int 2 | |
%959 = torch.aten.transpose.int %898, %int1_479, %int2_480 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %959, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_481 = torch.constant.int 1 | |
%int1_482 = torch.constant.int 1 | |
%int1_483 = torch.constant.int 1 | |
%int1_484 = torch.constant.int 1 | |
%960 = torch.prim.ListConstruct %int1_481, %int1_482, %int1_483, %int1_484 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%961 = torch.aten.repeat %958, %960 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %961, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%962 = torch.aten.mul.Tensor %959, %957 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %962, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int3_485 = torch.constant.int 3 | |
%int0_486 = torch.constant.int 0 | |
%int64_487 = torch.constant.int 64 | |
%int1_488 = torch.constant.int 1 | |
%963 = torch.aten.slice.Tensor %959, %int3_485, %int0_486, %int64_487, %int1_488 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %963, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%int3_489 = torch.constant.int 3 | |
%int64_490 = torch.constant.int 64 | |
%int9223372036854775807_491 = torch.constant.int 9223372036854775807 | |
%int1_492 = torch.constant.int 1 | |
%964 = torch.aten.slice.Tensor %959, %int3_489, %int64_490, %int9223372036854775807_491, %int1_492 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %964, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%965 = torch.aten.neg %964 : !torch.vtensor<[1,32,?,64],bf16> -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %965, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%966 = torch.prim.ListConstruct %965, %963 : (!torch.vtensor<[1,32,?,64],bf16>, !torch.vtensor<[1,32,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_493 = torch.constant.int -1 | |
%967 = torch.aten.cat %966, %int-1_493 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %967, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%968 = torch.aten.mul.Tensor %967, %961 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %968, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_494 = torch.constant.int 1 | |
%969 = torch.aten.add.Tensor %962, %968, %int1_494 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %969, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_495 = torch.constant.int 1 | |
%int2_496 = torch.constant.int 2 | |
%970 = torch.aten.transpose.int %969, %int1_495, %int2_496 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %970, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int131072_497 = torch.constant.int 131072 | |
%none_498 = torch.constant.none | |
%none_499 = torch.constant.none | |
%cpu_500 = torch.constant.device "cpu" | |
%false_501 = torch.constant.bool false | |
%971 = torch.aten.arange %int131072_497, %none_498, %none_499, %cpu_500, %false_501 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_502 = torch.constant.int 0 | |
%int128_503 = torch.constant.int 128 | |
%int2_504 = torch.constant.int 2 | |
%int4_505 = torch.constant.int 4 | |
%none_506 = torch.constant.none | |
%cpu_507 = torch.constant.device "cpu" | |
%false_508 = torch.constant.bool false | |
%972 = torch.aten.arange.start_step %int0_502, %int128_503, %int2_504, %int4_505, %none_506, %cpu_507, %false_508 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_509 = torch.constant.int 6 | |
%973 = torch.prims.convert_element_type %972, %int6_509 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_510 = torch.constant.int 128 | |
%974 = torch.aten.div.Scalar %973, %int128_510 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_511 = torch.constant.float 5.000000e+05 | |
%975 = torch.aten.pow.Scalar %float5.000000e05_511, %974 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%976 = torch.aten.reciprocal %975 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_512 = torch.constant.float 1.000000e+00 | |
%977 = torch.aten.mul.Scalar %976, %float1.000000e00_512 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%978 = torch.aten.reciprocal %977 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_513 = torch.constant.float 6.2831853071795862 | |
%979 = torch.aten.mul.Scalar %978, %float6.283190e00_513 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_514 = torch.constant.float 8.192000e+03 | |
%980 = torch.aten.gt.Scalar %979, %float8.192000e03_514 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_515 = torch.constant.int 8 | |
%981 = torch.aten.div.Scalar %977, %int8_515 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%982 = torch.aten.where.self %980, %981, %977 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%983 = torch.aten.reciprocal %979 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_516 = torch.constant.int 8192 | |
%984 = torch.aten.mul.Scalar %983, %int8192_516 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_517 = torch.constant.int 1 | |
%int1_518 = torch.constant.int 1 | |
%985 = torch.aten.sub.Scalar %984, %int1_517, %int1_518 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_519 = torch.constant.int 3 | |
%986 = torch.aten.div.Scalar %985, %int3_519 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_520 = torch.constant.int 1 | |
%int1_521 = torch.constant.int 1 | |
%987 = torch.aten.rsub.Scalar %986, %int1_520, %int1_521 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%988 = torch.aten.mul.Tensor %987, %982 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_522 = torch.constant.int 8 | |
%989 = torch.aten.div.Scalar %988, %int8_522 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%990 = torch.aten.mul.Tensor %986, %982 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_523 = torch.constant.int 1 | |
%991 = torch.aten.add.Tensor %989, %990, %int1_523 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_524 = torch.constant.float 2.048000e+03 | |
%992 = torch.aten.lt.Scalar %979, %float2.048000e03_524 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%993 = torch.aten.bitwise_not %992 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_525 = torch.constant.float 8.192000e+03 | |
%994 = torch.aten.gt.Scalar %979, %float8.192000e03_525 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%995 = torch.aten.bitwise_not %994 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%996 = torch.aten.mul.Tensor %993, %995 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%997 = torch.aten.where.self %996, %991, %982 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%998 = torch.prim.ListConstruct %997, %997 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_526 = torch.constant.int -1 | |
%999 = torch.aten.cat %998, %int-1_526 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_527 = torch.constant.int 6 | |
%1000 = torch.prims.convert_element_type %971, %int6_527 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_528 = torch.constant.int 131072 | |
%int1_529 = torch.constant.int 1 | |
%1001 = torch.prim.ListConstruct %int131072_528, %int1_529 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1002 = torch.aten.view %1000, %1001 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%1003 = torch.aten.mul.Tensor %1002, %999 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%1004 = torch.aten.cos %1003 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_530 = torch.constant.int 15 | |
%1005 = torch.prims.convert_element_type %1004, %int15_530 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%1006 = torch.aten.sin %1003 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_531 = torch.constant.int 15 | |
%1007 = torch.prims.convert_element_type %1006, %int15_531 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_532 = torch.constant.int 1 | |
%1008 = torch.aten.size.int %885, %int1_532 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_533 = torch.constant.int 0 | |
%1009 = torch.aten.add.int %int0_533, %1008 : !torch.int, !torch.int -> !torch.int | |
%int0_534 = torch.constant.int 0 | |
%int0_535 = torch.constant.int 0 | |
%int1_536 = torch.constant.int 1 | |
%1010 = torch.aten.slice.Tensor %1005, %int0_534, %int0_535, %1009, %int1_536 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1010, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_537 = torch.constant.int 1 | |
%int0_538 = torch.constant.int 0 | |
%int9223372036854775807_539 = torch.constant.int 9223372036854775807 | |
%int1_540 = torch.constant.int 1 | |
%1011 = torch.aten.slice.Tensor %1010, %int1_537, %int0_538, %int9223372036854775807_539, %int1_540 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1011, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_541 = torch.constant.int 0 | |
%1012 = torch.aten.add.int %int0_541, %1008 : !torch.int, !torch.int -> !torch.int | |
%int0_542 = torch.constant.int 0 | |
%int0_543 = torch.constant.int 0 | |
%int1_544 = torch.constant.int 1 | |
%1013 = torch.aten.slice.Tensor %1007, %int0_542, %int0_543, %1012, %int1_544 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1013, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_545 = torch.constant.int 1 | |
%int0_546 = torch.constant.int 0 | |
%int9223372036854775807_547 = torch.constant.int 9223372036854775807 | |
%int1_548 = torch.constant.int 1 | |
%1014 = torch.aten.slice.Tensor %1013, %int1_545, %int0_546, %int9223372036854775807_547, %int1_548 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1014, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_549 = torch.constant.int 0 | |
%1015 = torch.aten.unsqueeze %1011, %int0_549 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1015, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_550 = torch.constant.int 1 | |
%int0_551 = torch.constant.int 0 | |
%int9223372036854775807_552 = torch.constant.int 9223372036854775807 | |
%int1_553 = torch.constant.int 1 | |
%1016 = torch.aten.slice.Tensor %1015, %int1_550, %int0_551, %int9223372036854775807_552, %int1_553 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1016, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_554 = torch.constant.int 2 | |
%1017 = torch.aten.unsqueeze %1016, %int2_554 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1017, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_555 = torch.constant.int 3 | |
%int0_556 = torch.constant.int 0 | |
%int9223372036854775807_557 = torch.constant.int 9223372036854775807 | |
%int1_558 = torch.constant.int 1 | |
%1018 = torch.aten.slice.Tensor %1017, %int3_555, %int0_556, %int9223372036854775807_557, %int1_558 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1018, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_559 = torch.constant.int 0 | |
%1019 = torch.aten.unsqueeze %1014, %int0_559 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1019, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_560 = torch.constant.int 1 | |
%int0_561 = torch.constant.int 0 | |
%int9223372036854775807_562 = torch.constant.int 9223372036854775807 | |
%int1_563 = torch.constant.int 1 | |
%1020 = torch.aten.slice.Tensor %1019, %int1_560, %int0_561, %int9223372036854775807_562, %int1_563 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1020, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_564 = torch.constant.int 2 | |
%1021 = torch.aten.unsqueeze %1020, %int2_564 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1021, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_565 = torch.constant.int 3 | |
%int0_566 = torch.constant.int 0 | |
%int9223372036854775807_567 = torch.constant.int 9223372036854775807 | |
%int1_568 = torch.constant.int 1 | |
%1022 = torch.aten.slice.Tensor %1021, %int3_565, %int0_566, %int9223372036854775807_567, %int1_568 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1022, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_569 = torch.constant.int 1 | |
%int2_570 = torch.constant.int 2 | |
%1023 = torch.aten.transpose.int %1018, %int1_569, %int2_570 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1023, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_571 = torch.constant.int 1 | |
%int1_572 = torch.constant.int 1 | |
%int1_573 = torch.constant.int 1 | |
%int1_574 = torch.constant.int 1 | |
%1024 = torch.prim.ListConstruct %int1_571, %int1_572, %int1_573, %int1_574 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1025 = torch.aten.repeat %1023, %1024 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1025, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_575 = torch.constant.int 1 | |
%int2_576 = torch.constant.int 2 | |
%1026 = torch.aten.transpose.int %1022, %int1_575, %int2_576 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1026, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_577 = torch.constant.int 1 | |
%int2_578 = torch.constant.int 2 | |
%1027 = torch.aten.transpose.int %900, %int1_577, %int2_578 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1027, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_579 = torch.constant.int 1 | |
%int1_580 = torch.constant.int 1 | |
%int1_581 = torch.constant.int 1 | |
%int1_582 = torch.constant.int 1 | |
%1028 = torch.prim.ListConstruct %int1_579, %int1_580, %int1_581, %int1_582 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1029 = torch.aten.repeat %1026, %1028 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1029, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%1030 = torch.aten.mul.Tensor %1027, %1025 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1030, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int3_583 = torch.constant.int 3 | |
%int0_584 = torch.constant.int 0 | |
%int64_585 = torch.constant.int 64 | |
%int1_586 = torch.constant.int 1 | |
%1031 = torch.aten.slice.Tensor %1027, %int3_583, %int0_584, %int64_585, %int1_586 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1031, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%int3_587 = torch.constant.int 3 | |
%int64_588 = torch.constant.int 64 | |
%int9223372036854775807_589 = torch.constant.int 9223372036854775807 | |
%int1_590 = torch.constant.int 1 | |
%1032 = torch.aten.slice.Tensor %1027, %int3_587, %int64_588, %int9223372036854775807_589, %int1_590 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1032, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%1033 = torch.aten.neg %1032 : !torch.vtensor<[1,8,?,64],bf16> -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1033, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%1034 = torch.prim.ListConstruct %1033, %1031 : (!torch.vtensor<[1,8,?,64],bf16>, !torch.vtensor<[1,8,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_591 = torch.constant.int -1 | |
%1035 = torch.aten.cat %1034, %int-1_591 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1035, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%1036 = torch.aten.mul.Tensor %1035, %1029 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1036, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_592 = torch.constant.int 1 | |
%1037 = torch.aten.add.Tensor %1030, %1036, %int1_592 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,8,?,128],bf16>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1037, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_593 = torch.constant.int 1 | |
%int2_594 = torch.constant.int 2 | |
%1038 = torch.aten.transpose.int %1037, %int1_593, %int2_594 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1038, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%1039 = torch.aten.div.Tensor %1038, %25 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1039, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_595 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_596 = torch.constant.float 2.400000e+02 | |
%1040 = torch.aten.clamp %1039, %float-2.400000e02_595, %float2.400000e02_596 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1040, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_597 = torch.constant.int 26 | |
%1041 = torch.prims.convert_element_type %1040, %int26_597 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1041, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%1042 = torch.aten.div.Tensor %902, %25 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1042, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_598 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_599 = torch.constant.float 2.400000e+02 | |
%1043 = torch.aten.clamp %1042, %float-2.400000e02_598, %float2.400000e02_599 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1043, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_600 = torch.constant.int 26 | |
%1044 = torch.prims.convert_element_type %1043, %int26_600 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1044, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%int64_601 = torch.constant.int 64 | |
%1045 = torch.aten.mul.Scalar %arg2, %int64_601 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1045, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int2_602 = torch.constant.int 2 | |
%int1_603 = torch.constant.int 1 | |
%1046 = torch.aten.add.Scalar %1045, %int2_602, %int1_603 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1046, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int1_604 = torch.constant.int 1 | |
%int32_605 = torch.constant.int 32 | |
%int8_606 = torch.constant.int 8 | |
%int128_607 = torch.constant.int 128 | |
%1047 = torch.prim.ListConstruct %int1_604, %748, %int32_605, %int8_606, %int128_607 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1048 = torch.aten.view %1041, %1047 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1048, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_608 = torch.constant.int 32 | |
%int8_609 = torch.constant.int 8 | |
%int128_610 = torch.constant.int 128 | |
%1049 = torch.prim.ListConstruct %748, %int32_608, %int8_609, %int128_610 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1050 = torch.aten.view %1048, %1049 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1050, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%1051 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%1052 = torch.aten.view %1046, %1051 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %1052, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%int32_611 = torch.constant.int 32 | |
%int2_612 = torch.constant.int 2 | |
%int32_613 = torch.constant.int 32 | |
%int8_614 = torch.constant.int 8 | |
%int128_615 = torch.constant.int 128 | |
%1053 = torch.prim.ListConstruct %739, %int32_611, %int2_612, %int32_613, %int8_614, %int128_615 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1054 = torch.aten.view %777, %1053 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1054, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_616 = torch.constant.int 32 | |
%1055 = torch.aten.mul.int %739, %int32_616 : !torch.int, !torch.int -> !torch.int | |
%int2_617 = torch.constant.int 2 | |
%1056 = torch.aten.mul.int %1055, %int2_617 : !torch.int, !torch.int -> !torch.int | |
%int32_618 = torch.constant.int 32 | |
%int8_619 = torch.constant.int 8 | |
%int128_620 = torch.constant.int 128 | |
%1057 = torch.prim.ListConstruct %1056, %int32_618, %int8_619, %int128_620 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1058 = torch.aten.view %1054, %1057 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1058, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%1059 = torch.prim.ListConstruct %1052 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_621 = torch.constant.bool false | |
%1060 = torch.aten.index_put %1058, %1059, %1050, %false_621 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1060, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_622 = torch.constant.int 32 | |
%int2_623 = torch.constant.int 2 | |
%int32_624 = torch.constant.int 32 | |
%int8_625 = torch.constant.int 8 | |
%int128_626 = torch.constant.int 128 | |
%1061 = torch.prim.ListConstruct %739, %int32_622, %int2_623, %int32_624, %int8_625, %int128_626 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1062 = torch.aten.view %1060, %1061 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1062, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_627 = torch.constant.int 2097152 | |
%1063 = torch.prim.ListConstruct %739, %int2097152_627 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1064 = torch.aten.view %1062, %1063 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1064, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int32_628 = torch.constant.int 32 | |
%int2_629 = torch.constant.int 2 | |
%int32_630 = torch.constant.int 32 | |
%int8_631 = torch.constant.int 8 | |
%int128_632 = torch.constant.int 128 | |
%1065 = torch.prim.ListConstruct %739, %int32_628, %int2_629, %int32_630, %int8_631, %int128_632 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1066 = torch.aten.view %1064, %1065 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1066, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_633 = torch.constant.int 32 | |
%int8_634 = torch.constant.int 8 | |
%int128_635 = torch.constant.int 128 | |
%1067 = torch.prim.ListConstruct %1056, %int32_633, %int8_634, %int128_635 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1068 = torch.aten.view %1066, %1067 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1068, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_636 = torch.constant.int 1 | |
%int32_637 = torch.constant.int 32 | |
%int8_638 = torch.constant.int 8 | |
%int128_639 = torch.constant.int 128 | |
%1069 = torch.prim.ListConstruct %int1_636, %748, %int32_637, %int8_638, %int128_639 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1070 = torch.aten.view %1044, %1069 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1070, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_640 = torch.constant.int 32 | |
%int8_641 = torch.constant.int 8 | |
%int128_642 = torch.constant.int 128 | |
%1071 = torch.prim.ListConstruct %748, %int32_640, %int8_641, %int128_642 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1072 = torch.aten.view %1070, %1071 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1072, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_643 = torch.constant.int 1 | |
%int1_644 = torch.constant.int 1 | |
%1073 = torch.aten.add.Scalar %1046, %int1_643, %int1_644 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1073, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%1074 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%1075 = torch.aten.view %1073, %1074 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %1075, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%1076 = torch.prim.ListConstruct %1075 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_645 = torch.constant.bool false | |
%1077 = torch.aten.index_put %1068, %1076, %1072, %false_645 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1077, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_646 = torch.constant.int 32 | |
%int2_647 = torch.constant.int 2 | |
%int32_648 = torch.constant.int 32 | |
%int8_649 = torch.constant.int 8 | |
%int128_650 = torch.constant.int 128 | |
%1078 = torch.prim.ListConstruct %739, %int32_646, %int2_647, %int32_648, %int8_649, %int128_650 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1079 = torch.aten.view %1077, %1078 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1079, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_651 = torch.constant.int 2097152 | |
%1080 = torch.prim.ListConstruct %739, %int2097152_651 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1081 = torch.aten.view %1079, %1080 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1081, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int-2_652 = torch.constant.int -2 | |
%1082 = torch.aten.unsqueeze %1041, %int-2_652 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1082, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_653 = torch.constant.int 1 | |
%int8_654 = torch.constant.int 8 | |
%int4_655 = torch.constant.int 4 | |
%int128_656 = torch.constant.int 128 | |
%1083 = torch.prim.ListConstruct %int1_653, %1008, %int8_654, %int4_655, %int128_656 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_657 = torch.constant.bool false | |
%1084 = torch.aten.expand %1082, %1083, %false_657 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1084, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_658 = torch.constant.int 0 | |
%1085 = torch.aten.clone %1084, %int0_658 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1085, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_659 = torch.constant.int 1 | |
%int32_660 = torch.constant.int 32 | |
%int128_661 = torch.constant.int 128 | |
%1086 = torch.prim.ListConstruct %int1_659, %1008, %int32_660, %int128_661 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1087 = torch.aten._unsafe_view %1085, %1086 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1087, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int-2_662 = torch.constant.int -2 | |
%1088 = torch.aten.unsqueeze %1044, %int-2_662 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1088, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_663 = torch.constant.int 1 | |
%1089 = torch.aten.size.int %895, %int1_663 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int1_664 = torch.constant.int 1 | |
%int8_665 = torch.constant.int 8 | |
%int4_666 = torch.constant.int 4 | |
%int128_667 = torch.constant.int 128 | |
%1090 = torch.prim.ListConstruct %int1_664, %1089, %int8_665, %int4_666, %int128_667 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_668 = torch.constant.bool false | |
%1091 = torch.aten.expand %1088, %1090, %false_668 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1091, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_669 = torch.constant.int 0 | |
%1092 = torch.aten.clone %1091, %int0_669 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1092, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_670 = torch.constant.int 1 | |
%int32_671 = torch.constant.int 32 | |
%int128_672 = torch.constant.int 128 | |
%1093 = torch.prim.ListConstruct %int1_670, %1089, %int32_671, %int128_672 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1094 = torch.aten._unsafe_view %1092, %1093 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1094, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int6_673 = torch.constant.int 6 | |
%1095 = torch.prims.convert_element_type %1087, %int6_673 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1095, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%1096 = torch.aten.mul.Tensor %1095, %25 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1096, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_674 = torch.constant.int 15 | |
%1097 = torch.prims.convert_element_type %1096, %int15_674 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1097, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int6_675 = torch.constant.int 6 | |
%1098 = torch.prims.convert_element_type %1094, %int6_675 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1098, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%1099 = torch.aten.mul.Tensor %1098, %25 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1099, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_676 = torch.constant.int 15 | |
%1100 = torch.prims.convert_element_type %1099, %int15_676 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1100, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_677 = torch.constant.int 1 | |
%int2_678 = torch.constant.int 2 | |
%1101 = torch.aten.transpose.int %970, %int1_677, %int2_678 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1101, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_679 = torch.constant.int 1 | |
%int2_680 = torch.constant.int 2 | |
%1102 = torch.aten.transpose.int %1097, %int1_679, %int2_680 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1102, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_681 = torch.constant.int 1 | |
%int2_682 = torch.constant.int 2 | |
%1103 = torch.aten.transpose.int %1100, %int1_681, %int2_682 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1103, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%float0.000000e00_683 = torch.constant.float 0.000000e+00 | |
%true_684 = torch.constant.bool true | |
%none_685 = torch.constant.none | |
%none_686 = torch.constant.none | |
%1104:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%1101, %1102, %1103, %float0.000000e00_683, %true_684, %none_685, %none_686) : (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?],f32>) | |
torch.bind_symbolic_shape %1104#0, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_687 = torch.constant.int 1 | |
%int2_688 = torch.constant.int 2 | |
%1105 = torch.aten.transpose.int %1104#0, %int1_687, %int2_688 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1105, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_689 = torch.constant.int 1 | |
%int4096_690 = torch.constant.int 4096 | |
%1106 = torch.prim.ListConstruct %int1_689, %940, %int4096_690 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1107 = torch.aten.view %1105, %1106 : !torch.vtensor<[1,?,32,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1107, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1108 = torch.aten.div.Tensor %1107, %26 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1108, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_691 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_692 = torch.constant.float 2.400000e+02 | |
%1109 = torch.aten.clamp %1108, %float-2.400000e02_691, %float2.400000e02_692 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1109, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_693 = torch.constant.int 26 | |
%1110 = torch.prims.convert_element_type %1109, %int26_693 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1110, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_694 = torch.constant.int -2 | |
%int-1_695 = torch.constant.int -1 | |
%1111 = torch.aten.transpose.int %27, %int-2_694, %int-1_695 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_696 = torch.constant.int 4096 | |
%1112 = torch.prim.ListConstruct %940, %int4096_696 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1113 = torch.aten.view %1110, %1112 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1113, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1114 = torch.aten.mm %1113, %1111 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1114, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_697 = torch.constant.int 1 | |
%int4096_698 = torch.constant.int 4096 | |
%1115 = torch.prim.ListConstruct %int1_697, %940, %int4096_698 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1116 = torch.aten.view %1114, %1115 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1116, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_699 = torch.constant.int 15 | |
%1117 = torch.prims.convert_element_type %1116, %int15_699 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1117, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_700 = torch.constant.int 1 | |
%1118 = torch.aten.add.Tensor %857, %1117, %int1_700 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1118, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_701 = torch.constant.int 6 | |
%1119 = torch.prims.convert_element_type %1118, %int6_701 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1119, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_702 = torch.constant.int 2 | |
%1120 = torch.aten.pow.Tensor_Scalar %1119, %int2_702 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1120, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_703 = torch.constant.int -1 | |
%1121 = torch.prim.ListConstruct %int-1_703 : (!torch.int) -> !torch.list<int> | |
%true_704 = torch.constant.bool true | |
%none_705 = torch.constant.none | |
%1122 = torch.aten.mean.dim %1120, %1121, %true_704, %none_705 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1122, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_706 = torch.constant.float 1.000000e-05 | |
%int1_707 = torch.constant.int 1 | |
%1123 = torch.aten.add.Scalar %1122, %float1.000000e-05_706, %int1_707 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1123, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1124 = torch.aten.rsqrt %1123 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1124, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1125 = torch.aten.mul.Tensor %1119, %1124 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1125, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_708 = torch.constant.int 15 | |
%1126 = torch.prims.convert_element_type %1125, %int15_708 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1126, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1127 = torch.aten.mul.Tensor %28, %1126 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1127, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1128 = torch.aten.div.Tensor %1127, %29 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1128, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_709 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_710 = torch.constant.float 2.400000e+02 | |
%1129 = torch.aten.clamp %1128, %float-2.400000e02_709, %float2.400000e02_710 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1129, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_711 = torch.constant.int 26 | |
%1130 = torch.prims.convert_element_type %1129, %int26_711 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1130, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_712 = torch.constant.int -2 | |
%int-1_713 = torch.constant.int -1 | |
%1131 = torch.aten.transpose.int %30, %int-2_712, %int-1_713 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_714 = torch.constant.int 4096 | |
%1132 = torch.prim.ListConstruct %564, %int4096_714 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1133 = torch.aten.view %1130, %1132 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1133, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1134 = torch.aten.mm %1133, %1131 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1134, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_715 = torch.constant.int 1 | |
%int14336_716 = torch.constant.int 14336 | |
%1135 = torch.prim.ListConstruct %int1_715, %564, %int14336_716 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1136 = torch.aten.view %1134, %1135 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1136, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_717 = torch.constant.int 15 | |
%1137 = torch.prims.convert_element_type %1136, %int15_717 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1137, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1138 = torch.aten.silu %1137 : !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1138, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1139 = torch.aten.div.Tensor %1127, %31 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1139, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_718 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_719 = torch.constant.float 2.400000e+02 | |
%1140 = torch.aten.clamp %1139, %float-2.400000e02_718, %float2.400000e02_719 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1140, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_720 = torch.constant.int 26 | |
%1141 = torch.prims.convert_element_type %1140, %int26_720 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1141, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_721 = torch.constant.int -2 | |
%int-1_722 = torch.constant.int -1 | |
%1142 = torch.aten.transpose.int %32, %int-2_721, %int-1_722 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_723 = torch.constant.int 4096 | |
%1143 = torch.prim.ListConstruct %564, %int4096_723 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1144 = torch.aten.view %1141, %1143 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1144, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1145 = torch.aten.mm %1144, %1142 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1145, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_724 = torch.constant.int 1 | |
%int14336_725 = torch.constant.int 14336 | |
%1146 = torch.prim.ListConstruct %int1_724, %564, %int14336_725 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1147 = torch.aten.view %1145, %1146 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1147, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_726 = torch.constant.int 15 | |
%1148 = torch.prims.convert_element_type %1147, %int15_726 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1148, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1149 = torch.aten.mul.Tensor %1138, %1148 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1149, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1150 = torch.aten.div.Tensor %1149, %33 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1150, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%float-2.400000e02_727 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_728 = torch.constant.float 2.400000e+02 | |
%1151 = torch.aten.clamp %1150, %float-2.400000e02_727, %float2.400000e02_728 : !torch.vtensor<[1,?,14336],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1151, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%int26_729 = torch.constant.int 26 | |
%1152 = torch.prims.convert_element_type %1151, %int26_729 : !torch.vtensor<[1,?,14336],bf16>, !torch.int -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1152, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int-2_730 = torch.constant.int -2 | |
%int-1_731 = torch.constant.int -1 | |
%1153 = torch.aten.transpose.int %34, %int-2_730, %int-1_731 : !torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%int1_732 = torch.constant.int 1 | |
%1154 = torch.aten.size.int %1136, %int1_732 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int14336_733 = torch.constant.int 14336 | |
%1155 = torch.prim.ListConstruct %1154, %int14336_733 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1156 = torch.aten.view %1152, %1155 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1156, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%1157 = torch.aten.mm %1156, %1153 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.vtensor<[14336,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1157, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_734 = torch.constant.int 1 | |
%int4096_735 = torch.constant.int 4096 | |
%1158 = torch.prim.ListConstruct %int1_734, %1154, %int4096_735 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1159 = torch.aten.view %1157, %1158 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1159, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_736 = torch.constant.int 15 | |
%1160 = torch.prims.convert_element_type %1159, %int15_736 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1160, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_737 = torch.constant.int 1 | |
%1161 = torch.aten.add.Tensor %1118, %1160, %int1_737 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1161, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_738 = torch.constant.int 6 | |
%1162 = torch.prims.convert_element_type %1161, %int6_738 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1162, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_739 = torch.constant.int 2 | |
%1163 = torch.aten.pow.Tensor_Scalar %1162, %int2_739 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1163, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_740 = torch.constant.int -1 | |
%1164 = torch.prim.ListConstruct %int-1_740 : (!torch.int) -> !torch.list<int> | |
%true_741 = torch.constant.bool true | |
%none_742 = torch.constant.none | |
%1165 = torch.aten.mean.dim %1163, %1164, %true_741, %none_742 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1165, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_743 = torch.constant.float 1.000000e-05 | |
%int1_744 = torch.constant.int 1 | |
%1166 = torch.aten.add.Scalar %1165, %float1.000000e-05_743, %int1_744 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1166, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1167 = torch.aten.rsqrt %1166 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1167, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1168 = torch.aten.mul.Tensor %1162, %1167 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1168, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_745 = torch.constant.int 15 | |
%1169 = torch.prims.convert_element_type %1168, %int15_745 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1169, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1170 = torch.aten.mul.Tensor %35, %1169 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1170, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1171 = torch.aten.div.Tensor %1170, %36 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1171, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_746 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_747 = torch.constant.float 2.400000e+02 | |
%1172 = torch.aten.clamp %1171, %float-2.400000e02_746, %float2.400000e02_747 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1172, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_748 = torch.constant.int 26 | |
%1173 = torch.prims.convert_element_type %1172, %int26_748 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1173, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_749 = torch.constant.int -2 | |
%int-1_750 = torch.constant.int -1 | |
%1174 = torch.aten.transpose.int %37, %int-2_749, %int-1_750 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_751 = torch.constant.int 4096 | |
%1175 = torch.prim.ListConstruct %564, %int4096_751 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1176 = torch.aten.view %1173, %1175 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1176, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1177 = torch.aten.mm %1176, %1174 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1177, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_752 = torch.constant.int 1 | |
%int4096_753 = torch.constant.int 4096 | |
%1178 = torch.prim.ListConstruct %int1_752, %564, %int4096_753 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1179 = torch.aten.view %1177, %1178 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1179, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_754 = torch.constant.int 15 | |
%1180 = torch.prims.convert_element_type %1179, %int15_754 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1180, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1181 = torch.aten.div.Tensor %1170, %38 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1181, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_755 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_756 = torch.constant.float 2.400000e+02 | |
%1182 = torch.aten.clamp %1181, %float-2.400000e02_755, %float2.400000e02_756 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1182, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_757 = torch.constant.int 26 | |
%1183 = torch.prims.convert_element_type %1182, %int26_757 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1183, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_758 = torch.constant.int -2 | |
%int-1_759 = torch.constant.int -1 | |
%1184 = torch.aten.transpose.int %39, %int-2_758, %int-1_759 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_760 = torch.constant.int 4096 | |
%1185 = torch.prim.ListConstruct %564, %int4096_760 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1186 = torch.aten.view %1183, %1185 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1186, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1187 = torch.aten.mm %1186, %1184 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1187, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_761 = torch.constant.int 1 | |
%int1024_762 = torch.constant.int 1024 | |
%1188 = torch.prim.ListConstruct %int1_761, %564, %int1024_762 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1189 = torch.aten.view %1187, %1188 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1189, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_763 = torch.constant.int 15 | |
%1190 = torch.prims.convert_element_type %1189, %int15_763 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %1190, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%1191 = torch.aten.div.Tensor %1170, %40 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1191, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_764 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_765 = torch.constant.float 2.400000e+02 | |
%1192 = torch.aten.clamp %1191, %float-2.400000e02_764, %float2.400000e02_765 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1192, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_766 = torch.constant.int 26 | |
%1193 = torch.prims.convert_element_type %1192, %int26_766 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1193, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_767 = torch.constant.int -2 | |
%int-1_768 = torch.constant.int -1 | |
%1194 = torch.aten.transpose.int %41, %int-2_767, %int-1_768 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_769 = torch.constant.int 4096 | |
%1195 = torch.prim.ListConstruct %564, %int4096_769 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1196 = torch.aten.view %1193, %1195 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1196, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1197 = torch.aten.mm %1196, %1194 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1197, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_770 = torch.constant.int 1 | |
%int1024_771 = torch.constant.int 1024 | |
%1198 = torch.prim.ListConstruct %int1_770, %564, %int1024_771 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1199 = torch.aten.view %1197, %1198 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1199, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_772 = torch.constant.int 15 | |
%1200 = torch.prims.convert_element_type %1199, %int15_772 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %1200, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%int1_773 = torch.constant.int 1 | |
%int32_774 = torch.constant.int 32 | |
%int128_775 = torch.constant.int 128 | |
%1201 = torch.prim.ListConstruct %int1_773, %564, %int32_774, %int128_775 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1202 = torch.aten.view %1180, %1201 : !torch.vtensor<[1,?,4096],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1202, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_776 = torch.constant.int 1 | |
%int8_777 = torch.constant.int 8 | |
%int128_778 = torch.constant.int 128 | |
%1203 = torch.prim.ListConstruct %int1_776, %564, %int8_777, %int128_778 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1204 = torch.aten.view %1190, %1203 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1204, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int1_779 = torch.constant.int 1 | |
%int8_780 = torch.constant.int 8 | |
%int128_781 = torch.constant.int 128 | |
%1205 = torch.prim.ListConstruct %int1_779, %564, %int8_780, %int128_781 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1206 = torch.aten.view %1200, %1205 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1206, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int131072_782 = torch.constant.int 131072 | |
%none_783 = torch.constant.none | |
%none_784 = torch.constant.none | |
%cpu_785 = torch.constant.device "cpu" | |
%false_786 = torch.constant.bool false | |
%1207 = torch.aten.arange %int131072_782, %none_783, %none_784, %cpu_785, %false_786 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_787 = torch.constant.int 0 | |
%int128_788 = torch.constant.int 128 | |
%int2_789 = torch.constant.int 2 | |
%int4_790 = torch.constant.int 4 | |
%none_791 = torch.constant.none | |
%cpu_792 = torch.constant.device "cpu" | |
%false_793 = torch.constant.bool false | |
%1208 = torch.aten.arange.start_step %int0_787, %int128_788, %int2_789, %int4_790, %none_791, %cpu_792, %false_793 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_794 = torch.constant.int 6 | |
%1209 = torch.prims.convert_element_type %1208, %int6_794 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_795 = torch.constant.int 128 | |
%1210 = torch.aten.div.Scalar %1209, %int128_795 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_796 = torch.constant.float 5.000000e+05 | |
%1211 = torch.aten.pow.Scalar %float5.000000e05_796, %1210 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1212 = torch.aten.reciprocal %1211 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_797 = torch.constant.float 1.000000e+00 | |
%1213 = torch.aten.mul.Scalar %1212, %float1.000000e00_797 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%1214 = torch.aten.reciprocal %1213 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_798 = torch.constant.float 6.2831853071795862 | |
%1215 = torch.aten.mul.Scalar %1214, %float6.283190e00_798 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_799 = torch.constant.float 8.192000e+03 | |
%1216 = torch.aten.gt.Scalar %1215, %float8.192000e03_799 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_800 = torch.constant.int 8 | |
%1217 = torch.aten.div.Scalar %1213, %int8_800 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1218 = torch.aten.where.self %1216, %1217, %1213 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1219 = torch.aten.reciprocal %1215 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_801 = torch.constant.int 8192 | |
%1220 = torch.aten.mul.Scalar %1219, %int8192_801 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_802 = torch.constant.int 1 | |
%int1_803 = torch.constant.int 1 | |
%1221 = torch.aten.sub.Scalar %1220, %int1_802, %int1_803 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_804 = torch.constant.int 3 | |
%1222 = torch.aten.div.Scalar %1221, %int3_804 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_805 = torch.constant.int 1 | |
%int1_806 = torch.constant.int 1 | |
%1223 = torch.aten.rsub.Scalar %1222, %int1_805, %int1_806 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%1224 = torch.aten.mul.Tensor %1223, %1218 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_807 = torch.constant.int 8 | |
%1225 = torch.aten.div.Scalar %1224, %int8_807 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1226 = torch.aten.mul.Tensor %1222, %1218 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_808 = torch.constant.int 1 | |
%1227 = torch.aten.add.Tensor %1225, %1226, %int1_808 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_809 = torch.constant.float 2.048000e+03 | |
%1228 = torch.aten.lt.Scalar %1215, %float2.048000e03_809 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1229 = torch.aten.bitwise_not %1228 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_810 = torch.constant.float 8.192000e+03 | |
%1230 = torch.aten.gt.Scalar %1215, %float8.192000e03_810 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1231 = torch.aten.bitwise_not %1230 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1232 = torch.aten.mul.Tensor %1229, %1231 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1233 = torch.aten.where.self %1232, %1227, %1218 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1234 = torch.prim.ListConstruct %1233, %1233 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_811 = torch.constant.int -1 | |
%1235 = torch.aten.cat %1234, %int-1_811 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_812 = torch.constant.int 6 | |
%1236 = torch.prims.convert_element_type %1207, %int6_812 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_813 = torch.constant.int 131072 | |
%int1_814 = torch.constant.int 1 | |
%1237 = torch.prim.ListConstruct %int131072_813, %int1_814 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1238 = torch.aten.view %1236, %1237 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%1239 = torch.aten.mul.Tensor %1238, %1235 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%1240 = torch.aten.cos %1239 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_815 = torch.constant.int 15 | |
%1241 = torch.prims.convert_element_type %1240, %int15_815 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%1242 = torch.aten.sin %1239 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_816 = torch.constant.int 15 | |
%1243 = torch.prims.convert_element_type %1242, %int15_816 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_817 = torch.constant.int 1 | |
%1244 = torch.aten.size.int %1179, %int1_817 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_818 = torch.constant.int 0 | |
%1245 = torch.aten.add.int %int0_818, %1244 : !torch.int, !torch.int -> !torch.int | |
%int0_819 = torch.constant.int 0 | |
%int0_820 = torch.constant.int 0 | |
%int1_821 = torch.constant.int 1 | |
%1246 = torch.aten.slice.Tensor %1241, %int0_819, %int0_820, %1245, %int1_821 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1246, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_822 = torch.constant.int 1 | |
%int0_823 = torch.constant.int 0 | |
%int9223372036854775807_824 = torch.constant.int 9223372036854775807 | |
%int1_825 = torch.constant.int 1 | |
%1247 = torch.aten.slice.Tensor %1246, %int1_822, %int0_823, %int9223372036854775807_824, %int1_825 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1247, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_826 = torch.constant.int 0 | |
%1248 = torch.aten.add.int %int0_826, %1244 : !torch.int, !torch.int -> !torch.int | |
%int0_827 = torch.constant.int 0 | |
%int0_828 = torch.constant.int 0 | |
%int1_829 = torch.constant.int 1 | |
%1249 = torch.aten.slice.Tensor %1243, %int0_827, %int0_828, %1248, %int1_829 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1249, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_830 = torch.constant.int 1 | |
%int0_831 = torch.constant.int 0 | |
%int9223372036854775807_832 = torch.constant.int 9223372036854775807 | |
%int1_833 = torch.constant.int 1 | |
%1250 = torch.aten.slice.Tensor %1249, %int1_830, %int0_831, %int9223372036854775807_832, %int1_833 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1250, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_834 = torch.constant.int 0 | |
%1251 = torch.aten.unsqueeze %1247, %int0_834 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1251, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_835 = torch.constant.int 1 | |
%int0_836 = torch.constant.int 0 | |
%int9223372036854775807_837 = torch.constant.int 9223372036854775807 | |
%int1_838 = torch.constant.int 1 | |
%1252 = torch.aten.slice.Tensor %1251, %int1_835, %int0_836, %int9223372036854775807_837, %int1_838 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1252, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_839 = torch.constant.int 2 | |
%1253 = torch.aten.unsqueeze %1252, %int2_839 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1253, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_840 = torch.constant.int 3 | |
%int0_841 = torch.constant.int 0 | |
%int9223372036854775807_842 = torch.constant.int 9223372036854775807 | |
%int1_843 = torch.constant.int 1 | |
%1254 = torch.aten.slice.Tensor %1253, %int3_840, %int0_841, %int9223372036854775807_842, %int1_843 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1254, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_844 = torch.constant.int 0 | |
%1255 = torch.aten.unsqueeze %1250, %int0_844 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1255, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_845 = torch.constant.int 1 | |
%int0_846 = torch.constant.int 0 | |
%int9223372036854775807_847 = torch.constant.int 9223372036854775807 | |
%int1_848 = torch.constant.int 1 | |
%1256 = torch.aten.slice.Tensor %1255, %int1_845, %int0_846, %int9223372036854775807_847, %int1_848 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1256, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_849 = torch.constant.int 2 | |
%1257 = torch.aten.unsqueeze %1256, %int2_849 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1257, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_850 = torch.constant.int 3 | |
%int0_851 = torch.constant.int 0 | |
%int9223372036854775807_852 = torch.constant.int 9223372036854775807 | |
%int1_853 = torch.constant.int 1 | |
%1258 = torch.aten.slice.Tensor %1257, %int3_850, %int0_851, %int9223372036854775807_852, %int1_853 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1258, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_854 = torch.constant.int 1 | |
%int2_855 = torch.constant.int 2 | |
%1259 = torch.aten.transpose.int %1254, %int1_854, %int2_855 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1259, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_856 = torch.constant.int 1 | |
%int1_857 = torch.constant.int 1 | |
%int1_858 = torch.constant.int 1 | |
%int1_859 = torch.constant.int 1 | |
%1260 = torch.prim.ListConstruct %int1_856, %int1_857, %int1_858, %int1_859 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1261 = torch.aten.repeat %1259, %1260 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1261, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_860 = torch.constant.int 1 | |
%int2_861 = torch.constant.int 2 | |
%1262 = torch.aten.transpose.int %1258, %int1_860, %int2_861 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1262, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_862 = torch.constant.int 1 | |
%int2_863 = torch.constant.int 2 | |
%1263 = torch.aten.transpose.int %1202, %int1_862, %int2_863 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1263, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_864 = torch.constant.int 1 | |
%int1_865 = torch.constant.int 1 | |
%int1_866 = torch.constant.int 1 | |
%int1_867 = torch.constant.int 1 | |
%1264 = torch.prim.ListConstruct %int1_864, %int1_865, %int1_866, %int1_867 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1265 = torch.aten.repeat %1262, %1264 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1265, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%1266 = torch.aten.mul.Tensor %1263, %1261 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1266, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int3_868 = torch.constant.int 3 | |
%int0_869 = torch.constant.int 0 | |
%int64_870 = torch.constant.int 64 | |
%int1_871 = torch.constant.int 1 | |
%1267 = torch.aten.slice.Tensor %1263, %int3_868, %int0_869, %int64_870, %int1_871 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %1267, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%int3_872 = torch.constant.int 3 | |
%int64_873 = torch.constant.int 64 | |
%int9223372036854775807_874 = torch.constant.int 9223372036854775807 | |
%int1_875 = torch.constant.int 1 | |
%1268 = torch.aten.slice.Tensor %1263, %int3_872, %int64_873, %int9223372036854775807_874, %int1_875 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %1268, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%1269 = torch.aten.neg %1268 : !torch.vtensor<[1,32,?,64],bf16> -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %1269, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%1270 = torch.prim.ListConstruct %1269, %1267 : (!torch.vtensor<[1,32,?,64],bf16>, !torch.vtensor<[1,32,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_876 = torch.constant.int -1 | |
%1271 = torch.aten.cat %1270, %int-1_876 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1271, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%1272 = torch.aten.mul.Tensor %1271, %1265 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1272, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_877 = torch.constant.int 1 | |
%1273 = torch.aten.add.Tensor %1266, %1272, %int1_877 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1273, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_878 = torch.constant.int 1 | |
%int2_879 = torch.constant.int 2 | |
%1274 = torch.aten.transpose.int %1273, %int1_878, %int2_879 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1274, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int131072_880 = torch.constant.int 131072 | |
%none_881 = torch.constant.none | |
%none_882 = torch.constant.none | |
%cpu_883 = torch.constant.device "cpu" | |
%false_884 = torch.constant.bool false | |
%1275 = torch.aten.arange %int131072_880, %none_881, %none_882, %cpu_883, %false_884 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_885 = torch.constant.int 0 | |
%int128_886 = torch.constant.int 128 | |
%int2_887 = torch.constant.int 2 | |
%int4_888 = torch.constant.int 4 | |
%none_889 = torch.constant.none | |
%cpu_890 = torch.constant.device "cpu" | |
%false_891 = torch.constant.bool false | |
%1276 = torch.aten.arange.start_step %int0_885, %int128_886, %int2_887, %int4_888, %none_889, %cpu_890, %false_891 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_892 = torch.constant.int 6 | |
%1277 = torch.prims.convert_element_type %1276, %int6_892 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_893 = torch.constant.int 128 | |
%1278 = torch.aten.div.Scalar %1277, %int128_893 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_894 = torch.constant.float 5.000000e+05 | |
%1279 = torch.aten.pow.Scalar %float5.000000e05_894, %1278 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1280 = torch.aten.reciprocal %1279 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_895 = torch.constant.float 1.000000e+00 | |
%1281 = torch.aten.mul.Scalar %1280, %float1.000000e00_895 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%1282 = torch.aten.reciprocal %1281 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_896 = torch.constant.float 6.2831853071795862 | |
%1283 = torch.aten.mul.Scalar %1282, %float6.283190e00_896 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_897 = torch.constant.float 8.192000e+03 | |
%1284 = torch.aten.gt.Scalar %1283, %float8.192000e03_897 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_898 = torch.constant.int 8 | |
%1285 = torch.aten.div.Scalar %1281, %int8_898 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1286 = torch.aten.where.self %1284, %1285, %1281 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1287 = torch.aten.reciprocal %1283 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_899 = torch.constant.int 8192 | |
%1288 = torch.aten.mul.Scalar %1287, %int8192_899 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_900 = torch.constant.int 1 | |
%int1_901 = torch.constant.int 1 | |
%1289 = torch.aten.sub.Scalar %1288, %int1_900, %int1_901 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_902 = torch.constant.int 3 | |
%1290 = torch.aten.div.Scalar %1289, %int3_902 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_903 = torch.constant.int 1 | |
%int1_904 = torch.constant.int 1 | |
%1291 = torch.aten.rsub.Scalar %1290, %int1_903, %int1_904 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%1292 = torch.aten.mul.Tensor %1291, %1286 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_905 = torch.constant.int 8 | |
%1293 = torch.aten.div.Scalar %1292, %int8_905 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1294 = torch.aten.mul.Tensor %1290, %1286 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_906 = torch.constant.int 1 | |
%1295 = torch.aten.add.Tensor %1293, %1294, %int1_906 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_907 = torch.constant.float 2.048000e+03 | |
%1296 = torch.aten.lt.Scalar %1283, %float2.048000e03_907 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1297 = torch.aten.bitwise_not %1296 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_908 = torch.constant.float 8.192000e+03 | |
%1298 = torch.aten.gt.Scalar %1283, %float8.192000e03_908 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1299 = torch.aten.bitwise_not %1298 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1300 = torch.aten.mul.Tensor %1297, %1299 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1301 = torch.aten.where.self %1300, %1295, %1286 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1302 = torch.prim.ListConstruct %1301, %1301 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_909 = torch.constant.int -1 | |
%1303 = torch.aten.cat %1302, %int-1_909 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_910 = torch.constant.int 6 | |
%1304 = torch.prims.convert_element_type %1275, %int6_910 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_911 = torch.constant.int 131072 | |
%int1_912 = torch.constant.int 1 | |
%1305 = torch.prim.ListConstruct %int131072_911, %int1_912 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1306 = torch.aten.view %1304, %1305 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%1307 = torch.aten.mul.Tensor %1306, %1303 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%1308 = torch.aten.cos %1307 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_913 = torch.constant.int 15 | |
%1309 = torch.prims.convert_element_type %1308, %int15_913 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%1310 = torch.aten.sin %1307 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_914 = torch.constant.int 15 | |
%1311 = torch.prims.convert_element_type %1310, %int15_914 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_915 = torch.constant.int 1 | |
%1312 = torch.aten.size.int %1189, %int1_915 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_916 = torch.constant.int 0 | |
%1313 = torch.aten.add.int %int0_916, %1312 : !torch.int, !torch.int -> !torch.int | |
%int0_917 = torch.constant.int 0 | |
%int0_918 = torch.constant.int 0 | |
%int1_919 = torch.constant.int 1 | |
%1314 = torch.aten.slice.Tensor %1309, %int0_917, %int0_918, %1313, %int1_919 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1314, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_920 = torch.constant.int 1 | |
%int0_921 = torch.constant.int 0 | |
%int9223372036854775807_922 = torch.constant.int 9223372036854775807 | |
%int1_923 = torch.constant.int 1 | |
%1315 = torch.aten.slice.Tensor %1314, %int1_920, %int0_921, %int9223372036854775807_922, %int1_923 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1315, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_924 = torch.constant.int 0 | |
%1316 = torch.aten.add.int %int0_924, %1312 : !torch.int, !torch.int -> !torch.int | |
%int0_925 = torch.constant.int 0 | |
%int0_926 = torch.constant.int 0 | |
%int1_927 = torch.constant.int 1 | |
%1317 = torch.aten.slice.Tensor %1311, %int0_925, %int0_926, %1316, %int1_927 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1317, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_928 = torch.constant.int 1 | |
%int0_929 = torch.constant.int 0 | |
%int9223372036854775807_930 = torch.constant.int 9223372036854775807 | |
%int1_931 = torch.constant.int 1 | |
%1318 = torch.aten.slice.Tensor %1317, %int1_928, %int0_929, %int9223372036854775807_930, %int1_931 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1318, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_932 = torch.constant.int 0 | |
%1319 = torch.aten.unsqueeze %1315, %int0_932 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1319, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_933 = torch.constant.int 1 | |
%int0_934 = torch.constant.int 0 | |
%int9223372036854775807_935 = torch.constant.int 9223372036854775807 | |
%int1_936 = torch.constant.int 1 | |
%1320 = torch.aten.slice.Tensor %1319, %int1_933, %int0_934, %int9223372036854775807_935, %int1_936 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1320, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_937 = torch.constant.int 2 | |
%1321 = torch.aten.unsqueeze %1320, %int2_937 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1321, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_938 = torch.constant.int 3 | |
%int0_939 = torch.constant.int 0 | |
%int9223372036854775807_940 = torch.constant.int 9223372036854775807 | |
%int1_941 = torch.constant.int 1 | |
%1322 = torch.aten.slice.Tensor %1321, %int3_938, %int0_939, %int9223372036854775807_940, %int1_941 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1322, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_942 = torch.constant.int 0 | |
%1323 = torch.aten.unsqueeze %1318, %int0_942 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1323, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_943 = torch.constant.int 1 | |
%int0_944 = torch.constant.int 0 | |
%int9223372036854775807_945 = torch.constant.int 9223372036854775807 | |
%int1_946 = torch.constant.int 1 | |
%1324 = torch.aten.slice.Tensor %1323, %int1_943, %int0_944, %int9223372036854775807_945, %int1_946 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1324, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_947 = torch.constant.int 2 | |
%1325 = torch.aten.unsqueeze %1324, %int2_947 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1325, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_948 = torch.constant.int 3 | |
%int0_949 = torch.constant.int 0 | |
%int9223372036854775807_950 = torch.constant.int 9223372036854775807 | |
%int1_951 = torch.constant.int 1 | |
%1326 = torch.aten.slice.Tensor %1325, %int3_948, %int0_949, %int9223372036854775807_950, %int1_951 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1326, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_952 = torch.constant.int 1 | |
%int2_953 = torch.constant.int 2 | |
%1327 = torch.aten.transpose.int %1322, %int1_952, %int2_953 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1327, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_954 = torch.constant.int 1 | |
%int1_955 = torch.constant.int 1 | |
%int1_956 = torch.constant.int 1 | |
%int1_957 = torch.constant.int 1 | |
%1328 = torch.prim.ListConstruct %int1_954, %int1_955, %int1_956, %int1_957 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1329 = torch.aten.repeat %1327, %1328 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1329, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_958 = torch.constant.int 1 | |
%int2_959 = torch.constant.int 2 | |
%1330 = torch.aten.transpose.int %1326, %int1_958, %int2_959 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1330, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_960 = torch.constant.int 1 | |
%int2_961 = torch.constant.int 2 | |
%1331 = torch.aten.transpose.int %1204, %int1_960, %int2_961 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1331, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_962 = torch.constant.int 1 | |
%int1_963 = torch.constant.int 1 | |
%int1_964 = torch.constant.int 1 | |
%int1_965 = torch.constant.int 1 | |
%1332 = torch.prim.ListConstruct %int1_962, %int1_963, %int1_964, %int1_965 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1333 = torch.aten.repeat %1330, %1332 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1333, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%1334 = torch.aten.mul.Tensor %1331, %1329 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1334, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int3_966 = torch.constant.int 3 | |
%int0_967 = torch.constant.int 0 | |
%int64_968 = torch.constant.int 64 | |
%int1_969 = torch.constant.int 1 | |
%1335 = torch.aten.slice.Tensor %1331, %int3_966, %int0_967, %int64_968, %int1_969 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1335, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%int3_970 = torch.constant.int 3 | |
%int64_971 = torch.constant.int 64 | |
%int9223372036854775807_972 = torch.constant.int 9223372036854775807 | |
%int1_973 = torch.constant.int 1 | |
%1336 = torch.aten.slice.Tensor %1331, %int3_970, %int64_971, %int9223372036854775807_972, %int1_973 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1336, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%1337 = torch.aten.neg %1336 : !torch.vtensor<[1,8,?,64],bf16> -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1337, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%1338 = torch.prim.ListConstruct %1337, %1335 : (!torch.vtensor<[1,8,?,64],bf16>, !torch.vtensor<[1,8,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_974 = torch.constant.int -1 | |
%1339 = torch.aten.cat %1338, %int-1_974 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1339, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%1340 = torch.aten.mul.Tensor %1339, %1333 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1340, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_975 = torch.constant.int 1 | |
%1341 = torch.aten.add.Tensor %1334, %1340, %int1_975 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,8,?,128],bf16>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1341, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_976 = torch.constant.int 1 | |
%int2_977 = torch.constant.int 2 | |
%1342 = torch.aten.transpose.int %1341, %int1_976, %int2_977 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1342, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%1343 = torch.aten.div.Tensor %1342, %42 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1343, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_978 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_979 = torch.constant.float 2.400000e+02 | |
%1344 = torch.aten.clamp %1343, %float-2.400000e02_978, %float2.400000e02_979 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1344, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_980 = torch.constant.int 26 | |
%1345 = torch.prims.convert_element_type %1344, %int26_980 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1345, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%1346 = torch.aten.div.Tensor %1206, %42 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1346, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_981 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_982 = torch.constant.float 2.400000e+02 | |
%1347 = torch.aten.clamp %1346, %float-2.400000e02_981, %float2.400000e02_982 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1347, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_983 = torch.constant.int 26 | |
%1348 = torch.prims.convert_element_type %1347, %int26_983 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1348, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%int64_984 = torch.constant.int 64 | |
%1349 = torch.aten.mul.Scalar %arg2, %int64_984 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1349, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int4_985 = torch.constant.int 4 | |
%int1_986 = torch.constant.int 1 | |
%1350 = torch.aten.add.Scalar %1349, %int4_985, %int1_986 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1350, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int1_987 = torch.constant.int 1 | |
%int32_988 = torch.constant.int 32 | |
%int8_989 = torch.constant.int 8 | |
%int128_990 = torch.constant.int 128 | |
%1351 = torch.prim.ListConstruct %int1_987, %748, %int32_988, %int8_989, %int128_990 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1352 = torch.aten.view %1345, %1351 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1352, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_991 = torch.constant.int 32 | |
%int8_992 = torch.constant.int 8 | |
%int128_993 = torch.constant.int 128 | |
%1353 = torch.prim.ListConstruct %748, %int32_991, %int8_992, %int128_993 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1354 = torch.aten.view %1352, %1353 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1354, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%1355 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%1356 = torch.aten.view %1350, %1355 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %1356, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%int32_994 = torch.constant.int 32 | |
%int2_995 = torch.constant.int 2 | |
%int32_996 = torch.constant.int 32 | |
%int8_997 = torch.constant.int 8 | |
%int128_998 = torch.constant.int 128 | |
%1357 = torch.prim.ListConstruct %739, %int32_994, %int2_995, %int32_996, %int8_997, %int128_998 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1358 = torch.aten.view %1081, %1357 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1358, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_999 = torch.constant.int 32 | |
%1359 = torch.aten.mul.int %739, %int32_999 : !torch.int, !torch.int -> !torch.int | |
%int2_1000 = torch.constant.int 2 | |
%1360 = torch.aten.mul.int %1359, %int2_1000 : !torch.int, !torch.int -> !torch.int | |
%int32_1001 = torch.constant.int 32 | |
%int8_1002 = torch.constant.int 8 | |
%int128_1003 = torch.constant.int 128 | |
%1361 = torch.prim.ListConstruct %1360, %int32_1001, %int8_1002, %int128_1003 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1362 = torch.aten.view %1358, %1361 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1362, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%1363 = torch.prim.ListConstruct %1356 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_1004 = torch.constant.bool false | |
%1364 = torch.aten.index_put %1362, %1363, %1354, %false_1004 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1364, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_1005 = torch.constant.int 32 | |
%int2_1006 = torch.constant.int 2 | |
%int32_1007 = torch.constant.int 32 | |
%int8_1008 = torch.constant.int 8 | |
%int128_1009 = torch.constant.int 128 | |
%1365 = torch.prim.ListConstruct %739, %int32_1005, %int2_1006, %int32_1007, %int8_1008, %int128_1009 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1366 = torch.aten.view %1364, %1365 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1366, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_1010 = torch.constant.int 2097152 | |
%1367 = torch.prim.ListConstruct %739, %int2097152_1010 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1368 = torch.aten.view %1366, %1367 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1368, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int32_1011 = torch.constant.int 32 | |
%int2_1012 = torch.constant.int 2 | |
%int32_1013 = torch.constant.int 32 | |
%int8_1014 = torch.constant.int 8 | |
%int128_1015 = torch.constant.int 128 | |
%1369 = torch.prim.ListConstruct %739, %int32_1011, %int2_1012, %int32_1013, %int8_1014, %int128_1015 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1370 = torch.aten.view %1368, %1369 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1370, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_1016 = torch.constant.int 32 | |
%int8_1017 = torch.constant.int 8 | |
%int128_1018 = torch.constant.int 128 | |
%1371 = torch.prim.ListConstruct %1360, %int32_1016, %int8_1017, %int128_1018 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1372 = torch.aten.view %1370, %1371 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1372, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_1019 = torch.constant.int 1 | |
%int32_1020 = torch.constant.int 32 | |
%int8_1021 = torch.constant.int 8 | |
%int128_1022 = torch.constant.int 128 | |
%1373 = torch.prim.ListConstruct %int1_1019, %748, %int32_1020, %int8_1021, %int128_1022 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1374 = torch.aten.view %1348, %1373 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1374, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_1023 = torch.constant.int 32 | |
%int8_1024 = torch.constant.int 8 | |
%int128_1025 = torch.constant.int 128 | |
%1375 = torch.prim.ListConstruct %748, %int32_1023, %int8_1024, %int128_1025 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1376 = torch.aten.view %1374, %1375 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1376, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_1026 = torch.constant.int 1 | |
%int1_1027 = torch.constant.int 1 | |
%1377 = torch.aten.add.Scalar %1350, %int1_1026, %int1_1027 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1377, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%1378 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%1379 = torch.aten.view %1377, %1378 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %1379, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%1380 = torch.prim.ListConstruct %1379 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_1028 = torch.constant.bool false | |
%1381 = torch.aten.index_put %1372, %1380, %1376, %false_1028 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1381, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_1029 = torch.constant.int 32 | |
%int2_1030 = torch.constant.int 2 | |
%int32_1031 = torch.constant.int 32 | |
%int8_1032 = torch.constant.int 8 | |
%int128_1033 = torch.constant.int 128 | |
%1382 = torch.prim.ListConstruct %739, %int32_1029, %int2_1030, %int32_1031, %int8_1032, %int128_1033 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1383 = torch.aten.view %1381, %1382 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1383, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_1034 = torch.constant.int 2097152 | |
%1384 = torch.prim.ListConstruct %739, %int2097152_1034 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1385 = torch.aten.view %1383, %1384 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1385, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int-2_1035 = torch.constant.int -2 | |
%1386 = torch.aten.unsqueeze %1345, %int-2_1035 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1386, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_1036 = torch.constant.int 1 | |
%int8_1037 = torch.constant.int 8 | |
%int4_1038 = torch.constant.int 4 | |
%int128_1039 = torch.constant.int 128 | |
%1387 = torch.prim.ListConstruct %int1_1036, %1312, %int8_1037, %int4_1038, %int128_1039 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_1040 = torch.constant.bool false | |
%1388 = torch.aten.expand %1386, %1387, %false_1040 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1388, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_1041 = torch.constant.int 0 | |
%1389 = torch.aten.clone %1388, %int0_1041 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1389, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_1042 = torch.constant.int 1 | |
%int32_1043 = torch.constant.int 32 | |
%int128_1044 = torch.constant.int 128 | |
%1390 = torch.prim.ListConstruct %int1_1042, %1312, %int32_1043, %int128_1044 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1391 = torch.aten._unsafe_view %1389, %1390 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1391, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int-2_1045 = torch.constant.int -2 | |
%1392 = torch.aten.unsqueeze %1348, %int-2_1045 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1392, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_1046 = torch.constant.int 1 | |
%1393 = torch.aten.size.int %1199, %int1_1046 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int1_1047 = torch.constant.int 1 | |
%int8_1048 = torch.constant.int 8 | |
%int4_1049 = torch.constant.int 4 | |
%int128_1050 = torch.constant.int 128 | |
%1394 = torch.prim.ListConstruct %int1_1047, %1393, %int8_1048, %int4_1049, %int128_1050 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_1051 = torch.constant.bool false | |
%1395 = torch.aten.expand %1392, %1394, %false_1051 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1395, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_1052 = torch.constant.int 0 | |
%1396 = torch.aten.clone %1395, %int0_1052 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1396, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_1053 = torch.constant.int 1 | |
%int32_1054 = torch.constant.int 32 | |
%int128_1055 = torch.constant.int 128 | |
%1397 = torch.prim.ListConstruct %int1_1053, %1393, %int32_1054, %int128_1055 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1398 = torch.aten._unsafe_view %1396, %1397 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1398, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int6_1056 = torch.constant.int 6 | |
%1399 = torch.prims.convert_element_type %1391, %int6_1056 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1399, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%1400 = torch.aten.mul.Tensor %1399, %42 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1400, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_1057 = torch.constant.int 15 | |
%1401 = torch.prims.convert_element_type %1400, %int15_1057 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1401, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int6_1058 = torch.constant.int 6 | |
%1402 = torch.prims.convert_element_type %1398, %int6_1058 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1402, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%1403 = torch.aten.mul.Tensor %1402, %42 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1403, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_1059 = torch.constant.int 15 | |
%1404 = torch.prims.convert_element_type %1403, %int15_1059 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1404, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_1060 = torch.constant.int 1 | |
%int2_1061 = torch.constant.int 2 | |
%1405 = torch.aten.transpose.int %1274, %int1_1060, %int2_1061 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1405, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1062 = torch.constant.int 1 | |
%int2_1063 = torch.constant.int 2 | |
%1406 = torch.aten.transpose.int %1401, %int1_1062, %int2_1063 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1406, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1064 = torch.constant.int 1 | |
%int2_1065 = torch.constant.int 2 | |
%1407 = torch.aten.transpose.int %1404, %int1_1064, %int2_1065 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1407, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%float0.000000e00_1066 = torch.constant.float 0.000000e+00 | |
%true_1067 = torch.constant.bool true | |
%none_1068 = torch.constant.none | |
%none_1069 = torch.constant.none | |
%1408:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%1405, %1406, %1407, %float0.000000e00_1066, %true_1067, %none_1068, %none_1069) : (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?],f32>) | |
torch.bind_symbolic_shape %1408#0, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1070 = torch.constant.int 1 | |
%int2_1071 = torch.constant.int 2 | |
%1409 = torch.aten.transpose.int %1408#0, %int1_1070, %int2_1071 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1409, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_1072 = torch.constant.int 1 | |
%int4096_1073 = torch.constant.int 4096 | |
%1410 = torch.prim.ListConstruct %int1_1072, %1244, %int4096_1073 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1411 = torch.aten.view %1409, %1410 : !torch.vtensor<[1,?,32,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1411, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1412 = torch.aten.div.Tensor %1411, %43 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1412, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1074 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1075 = torch.constant.float 2.400000e+02 | |
%1413 = torch.aten.clamp %1412, %float-2.400000e02_1074, %float2.400000e02_1075 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1413, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1076 = torch.constant.int 26 | |
%1414 = torch.prims.convert_element_type %1413, %int26_1076 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1414, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1077 = torch.constant.int -2 | |
%int-1_1078 = torch.constant.int -1 | |
%1415 = torch.aten.transpose.int %44, %int-2_1077, %int-1_1078 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_1079 = torch.constant.int 4096 | |
%1416 = torch.prim.ListConstruct %1244, %int4096_1079 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1417 = torch.aten.view %1414, %1416 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1417, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1418 = torch.aten.mm %1417, %1415 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1418, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_1080 = torch.constant.int 1 | |
%int4096_1081 = torch.constant.int 4096 | |
%1419 = torch.prim.ListConstruct %int1_1080, %1244, %int4096_1081 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1420 = torch.aten.view %1418, %1419 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1420, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_1082 = torch.constant.int 15 | |
%1421 = torch.prims.convert_element_type %1420, %int15_1082 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1421, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_1083 = torch.constant.int 1 | |
%1422 = torch.aten.add.Tensor %1161, %1421, %int1_1083 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1422, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_1084 = torch.constant.int 6 | |
%1423 = torch.prims.convert_element_type %1422, %int6_1084 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1423, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_1085 = torch.constant.int 2 | |
%1424 = torch.aten.pow.Tensor_Scalar %1423, %int2_1085 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1424, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_1086 = torch.constant.int -1 | |
%1425 = torch.prim.ListConstruct %int-1_1086 : (!torch.int) -> !torch.list<int> | |
%true_1087 = torch.constant.bool true | |
%none_1088 = torch.constant.none | |
%1426 = torch.aten.mean.dim %1424, %1425, %true_1087, %none_1088 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1426, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_1089 = torch.constant.float 1.000000e-05 | |
%int1_1090 = torch.constant.int 1 | |
%1427 = torch.aten.add.Scalar %1426, %float1.000000e-05_1089, %int1_1090 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1427, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1428 = torch.aten.rsqrt %1427 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1428, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1429 = torch.aten.mul.Tensor %1423, %1428 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1429, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_1091 = torch.constant.int 15 | |
%1430 = torch.prims.convert_element_type %1429, %int15_1091 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1430, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1431 = torch.aten.mul.Tensor %45, %1430 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1431, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1432 = torch.aten.div.Tensor %1431, %46 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1432, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1092 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1093 = torch.constant.float 2.400000e+02 | |
%1433 = torch.aten.clamp %1432, %float-2.400000e02_1092, %float2.400000e02_1093 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1433, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1094 = torch.constant.int 26 | |
%1434 = torch.prims.convert_element_type %1433, %int26_1094 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1434, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1095 = torch.constant.int -2 | |
%int-1_1096 = torch.constant.int -1 | |
%1435 = torch.aten.transpose.int %47, %int-2_1095, %int-1_1096 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_1097 = torch.constant.int 4096 | |
%1436 = torch.prim.ListConstruct %564, %int4096_1097 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1437 = torch.aten.view %1434, %1436 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1437, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1438 = torch.aten.mm %1437, %1435 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1438, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_1098 = torch.constant.int 1 | |
%int14336_1099 = torch.constant.int 14336 | |
%1439 = torch.prim.ListConstruct %int1_1098, %564, %int14336_1099 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1440 = torch.aten.view %1438, %1439 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1440, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_1100 = torch.constant.int 15 | |
%1441 = torch.prims.convert_element_type %1440, %int15_1100 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1441, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1442 = torch.aten.silu %1441 : !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1442, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1443 = torch.aten.div.Tensor %1431, %48 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1443, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1101 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1102 = torch.constant.float 2.400000e+02 | |
%1444 = torch.aten.clamp %1443, %float-2.400000e02_1101, %float2.400000e02_1102 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1444, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1103 = torch.constant.int 26 | |
%1445 = torch.prims.convert_element_type %1444, %int26_1103 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1445, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1104 = torch.constant.int -2 | |
%int-1_1105 = torch.constant.int -1 | |
%1446 = torch.aten.transpose.int %49, %int-2_1104, %int-1_1105 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_1106 = torch.constant.int 4096 | |
%1447 = torch.prim.ListConstruct %564, %int4096_1106 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1448 = torch.aten.view %1445, %1447 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1448, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1449 = torch.aten.mm %1448, %1446 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1449, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_1107 = torch.constant.int 1 | |
%int14336_1108 = torch.constant.int 14336 | |
%1450 = torch.prim.ListConstruct %int1_1107, %564, %int14336_1108 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1451 = torch.aten.view %1449, %1450 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1451, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_1109 = torch.constant.int 15 | |
%1452 = torch.prims.convert_element_type %1451, %int15_1109 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1452, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1453 = torch.aten.mul.Tensor %1442, %1452 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1453, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1454 = torch.aten.div.Tensor %1453, %50 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1454, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%float-2.400000e02_1110 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1111 = torch.constant.float 2.400000e+02 | |
%1455 = torch.aten.clamp %1454, %float-2.400000e02_1110, %float2.400000e02_1111 : !torch.vtensor<[1,?,14336],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1455, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%int26_1112 = torch.constant.int 26 | |
%1456 = torch.prims.convert_element_type %1455, %int26_1112 : !torch.vtensor<[1,?,14336],bf16>, !torch.int -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1456, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int-2_1113 = torch.constant.int -2 | |
%int-1_1114 = torch.constant.int -1 | |
%1457 = torch.aten.transpose.int %51, %int-2_1113, %int-1_1114 : !torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%int1_1115 = torch.constant.int 1 | |
%1458 = torch.aten.size.int %1440, %int1_1115 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int14336_1116 = torch.constant.int 14336 | |
%1459 = torch.prim.ListConstruct %1458, %int14336_1116 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1460 = torch.aten.view %1456, %1459 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1460, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%1461 = torch.aten.mm %1460, %1457 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.vtensor<[14336,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1461, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_1117 = torch.constant.int 1 | |
%int4096_1118 = torch.constant.int 4096 | |
%1462 = torch.prim.ListConstruct %int1_1117, %1458, %int4096_1118 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1463 = torch.aten.view %1461, %1462 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1463, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_1119 = torch.constant.int 15 | |
%1464 = torch.prims.convert_element_type %1463, %int15_1119 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1464, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_1120 = torch.constant.int 1 | |
%1465 = torch.aten.add.Tensor %1422, %1464, %int1_1120 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1465, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_1121 = torch.constant.int 6 | |
%1466 = torch.prims.convert_element_type %1465, %int6_1121 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1466, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_1122 = torch.constant.int 2 | |
%1467 = torch.aten.pow.Tensor_Scalar %1466, %int2_1122 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1467, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_1123 = torch.constant.int -1 | |
%1468 = torch.prim.ListConstruct %int-1_1123 : (!torch.int) -> !torch.list<int> | |
%true_1124 = torch.constant.bool true | |
%none_1125 = torch.constant.none | |
%1469 = torch.aten.mean.dim %1467, %1468, %true_1124, %none_1125 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1469, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_1126 = torch.constant.float 1.000000e-05 | |
%int1_1127 = torch.constant.int 1 | |
%1470 = torch.aten.add.Scalar %1469, %float1.000000e-05_1126, %int1_1127 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1470, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1471 = torch.aten.rsqrt %1470 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1471, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1472 = torch.aten.mul.Tensor %1466, %1471 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1472, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_1128 = torch.constant.int 15 | |
%1473 = torch.prims.convert_element_type %1472, %int15_1128 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1473, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1474 = torch.aten.mul.Tensor %52, %1473 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1474, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1475 = torch.aten.div.Tensor %1474, %53 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1475, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1129 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1130 = torch.constant.float 2.400000e+02 | |
%1476 = torch.aten.clamp %1475, %float-2.400000e02_1129, %float2.400000e02_1130 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1476, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1131 = torch.constant.int 26 | |
%1477 = torch.prims.convert_element_type %1476, %int26_1131 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1477, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1132 = torch.constant.int -2 | |
%int-1_1133 = torch.constant.int -1 | |
%1478 = torch.aten.transpose.int %54, %int-2_1132, %int-1_1133 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_1134 = torch.constant.int 4096 | |
%1479 = torch.prim.ListConstruct %564, %int4096_1134 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1480 = torch.aten.view %1477, %1479 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1480, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1481 = torch.aten.mm %1480, %1478 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1481, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_1135 = torch.constant.int 1 | |
%int4096_1136 = torch.constant.int 4096 | |
%1482 = torch.prim.ListConstruct %int1_1135, %564, %int4096_1136 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1483 = torch.aten.view %1481, %1482 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1483, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_1137 = torch.constant.int 15 | |
%1484 = torch.prims.convert_element_type %1483, %int15_1137 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1484, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1485 = torch.aten.div.Tensor %1474, %55 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1485, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1138 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1139 = torch.constant.float 2.400000e+02 | |
%1486 = torch.aten.clamp %1485, %float-2.400000e02_1138, %float2.400000e02_1139 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1486, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1140 = torch.constant.int 26 | |
%1487 = torch.prims.convert_element_type %1486, %int26_1140 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1487, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1141 = torch.constant.int -2 | |
%int-1_1142 = torch.constant.int -1 | |
%1488 = torch.aten.transpose.int %56, %int-2_1141, %int-1_1142 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_1143 = torch.constant.int 4096 | |
%1489 = torch.prim.ListConstruct %564, %int4096_1143 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1490 = torch.aten.view %1487, %1489 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1490, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1491 = torch.aten.mm %1490, %1488 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1491, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_1144 = torch.constant.int 1 | |
%int1024_1145 = torch.constant.int 1024 | |
%1492 = torch.prim.ListConstruct %int1_1144, %564, %int1024_1145 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1493 = torch.aten.view %1491, %1492 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1493, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_1146 = torch.constant.int 15 | |
%1494 = torch.prims.convert_element_type %1493, %int15_1146 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %1494, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%1495 = torch.aten.div.Tensor %1474, %57 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1495, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1147 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1148 = torch.constant.float 2.400000e+02 | |
%1496 = torch.aten.clamp %1495, %float-2.400000e02_1147, %float2.400000e02_1148 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1496, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1149 = torch.constant.int 26 | |
%1497 = torch.prims.convert_element_type %1496, %int26_1149 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1497, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1150 = torch.constant.int -2 | |
%int-1_1151 = torch.constant.int -1 | |
%1498 = torch.aten.transpose.int %58, %int-2_1150, %int-1_1151 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_1152 = torch.constant.int 4096 | |
%1499 = torch.prim.ListConstruct %564, %int4096_1152 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1500 = torch.aten.view %1497, %1499 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1500, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1501 = torch.aten.mm %1500, %1498 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1501, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_1153 = torch.constant.int 1 | |
%int1024_1154 = torch.constant.int 1024 | |
%1502 = torch.prim.ListConstruct %int1_1153, %564, %int1024_1154 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1503 = torch.aten.view %1501, %1502 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1503, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_1155 = torch.constant.int 15 | |
%1504 = torch.prims.convert_element_type %1503, %int15_1155 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %1504, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%int1_1156 = torch.constant.int 1 | |
%int32_1157 = torch.constant.int 32 | |
%int128_1158 = torch.constant.int 128 | |
%1505 = torch.prim.ListConstruct %int1_1156, %564, %int32_1157, %int128_1158 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1506 = torch.aten.view %1484, %1505 : !torch.vtensor<[1,?,4096],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1506, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_1159 = torch.constant.int 1 | |
%int8_1160 = torch.constant.int 8 | |
%int128_1161 = torch.constant.int 128 | |
%1507 = torch.prim.ListConstruct %int1_1159, %564, %int8_1160, %int128_1161 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1508 = torch.aten.view %1494, %1507 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1508, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int1_1162 = torch.constant.int 1 | |
%int8_1163 = torch.constant.int 8 | |
%int128_1164 = torch.constant.int 128 | |
%1509 = torch.prim.ListConstruct %int1_1162, %564, %int8_1163, %int128_1164 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1510 = torch.aten.view %1504, %1509 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1510, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int131072_1165 = torch.constant.int 131072 | |
%none_1166 = torch.constant.none | |
%none_1167 = torch.constant.none | |
%cpu_1168 = torch.constant.device "cpu" | |
%false_1169 = torch.constant.bool false | |
%1511 = torch.aten.arange %int131072_1165, %none_1166, %none_1167, %cpu_1168, %false_1169 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_1170 = torch.constant.int 0 | |
%int128_1171 = torch.constant.int 128 | |
%int2_1172 = torch.constant.int 2 | |
%int4_1173 = torch.constant.int 4 | |
%none_1174 = torch.constant.none | |
%cpu_1175 = torch.constant.device "cpu" | |
%false_1176 = torch.constant.bool false | |
%1512 = torch.aten.arange.start_step %int0_1170, %int128_1171, %int2_1172, %int4_1173, %none_1174, %cpu_1175, %false_1176 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_1177 = torch.constant.int 6 | |
%1513 = torch.prims.convert_element_type %1512, %int6_1177 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_1178 = torch.constant.int 128 | |
%1514 = torch.aten.div.Scalar %1513, %int128_1178 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_1179 = torch.constant.float 5.000000e+05 | |
%1515 = torch.aten.pow.Scalar %float5.000000e05_1179, %1514 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1516 = torch.aten.reciprocal %1515 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_1180 = torch.constant.float 1.000000e+00 | |
%1517 = torch.aten.mul.Scalar %1516, %float1.000000e00_1180 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%1518 = torch.aten.reciprocal %1517 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_1181 = torch.constant.float 6.2831853071795862 | |
%1519 = torch.aten.mul.Scalar %1518, %float6.283190e00_1181 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_1182 = torch.constant.float 8.192000e+03 | |
%1520 = torch.aten.gt.Scalar %1519, %float8.192000e03_1182 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_1183 = torch.constant.int 8 | |
%1521 = torch.aten.div.Scalar %1517, %int8_1183 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1522 = torch.aten.where.self %1520, %1521, %1517 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1523 = torch.aten.reciprocal %1519 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_1184 = torch.constant.int 8192 | |
%1524 = torch.aten.mul.Scalar %1523, %int8192_1184 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1185 = torch.constant.int 1 | |
%int1_1186 = torch.constant.int 1 | |
%1525 = torch.aten.sub.Scalar %1524, %int1_1185, %int1_1186 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_1187 = torch.constant.int 3 | |
%1526 = torch.aten.div.Scalar %1525, %int3_1187 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1188 = torch.constant.int 1 | |
%int1_1189 = torch.constant.int 1 | |
%1527 = torch.aten.rsub.Scalar %1526, %int1_1188, %int1_1189 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%1528 = torch.aten.mul.Tensor %1527, %1522 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_1190 = torch.constant.int 8 | |
%1529 = torch.aten.div.Scalar %1528, %int8_1190 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1530 = torch.aten.mul.Tensor %1526, %1522 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_1191 = torch.constant.int 1 | |
%1531 = torch.aten.add.Tensor %1529, %1530, %int1_1191 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_1192 = torch.constant.float 2.048000e+03 | |
%1532 = torch.aten.lt.Scalar %1519, %float2.048000e03_1192 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1533 = torch.aten.bitwise_not %1532 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_1193 = torch.constant.float 8.192000e+03 | |
%1534 = torch.aten.gt.Scalar %1519, %float8.192000e03_1193 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1535 = torch.aten.bitwise_not %1534 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1536 = torch.aten.mul.Tensor %1533, %1535 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1537 = torch.aten.where.self %1536, %1531, %1522 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1538 = torch.prim.ListConstruct %1537, %1537 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_1194 = torch.constant.int -1 | |
%1539 = torch.aten.cat %1538, %int-1_1194 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_1195 = torch.constant.int 6 | |
%1540 = torch.prims.convert_element_type %1511, %int6_1195 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_1196 = torch.constant.int 131072 | |
%int1_1197 = torch.constant.int 1 | |
%1541 = torch.prim.ListConstruct %int131072_1196, %int1_1197 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1542 = torch.aten.view %1540, %1541 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%1543 = torch.aten.mul.Tensor %1542, %1539 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%1544 = torch.aten.cos %1543 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1198 = torch.constant.int 15 | |
%1545 = torch.prims.convert_element_type %1544, %int15_1198 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%1546 = torch.aten.sin %1543 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1199 = torch.constant.int 15 | |
%1547 = torch.prims.convert_element_type %1546, %int15_1199 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_1200 = torch.constant.int 1 | |
%1548 = torch.aten.size.int %1483, %int1_1200 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_1201 = torch.constant.int 0 | |
%1549 = torch.aten.add.int %int0_1201, %1548 : !torch.int, !torch.int -> !torch.int | |
%int0_1202 = torch.constant.int 0 | |
%int0_1203 = torch.constant.int 0 | |
%int1_1204 = torch.constant.int 1 | |
%1550 = torch.aten.slice.Tensor %1545, %int0_1202, %int0_1203, %1549, %int1_1204 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1550, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1205 = torch.constant.int 1 | |
%int0_1206 = torch.constant.int 0 | |
%int9223372036854775807_1207 = torch.constant.int 9223372036854775807 | |
%int1_1208 = torch.constant.int 1 | |
%1551 = torch.aten.slice.Tensor %1550, %int1_1205, %int0_1206, %int9223372036854775807_1207, %int1_1208 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1551, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1209 = torch.constant.int 0 | |
%1552 = torch.aten.add.int %int0_1209, %1548 : !torch.int, !torch.int -> !torch.int | |
%int0_1210 = torch.constant.int 0 | |
%int0_1211 = torch.constant.int 0 | |
%int1_1212 = torch.constant.int 1 | |
%1553 = torch.aten.slice.Tensor %1547, %int0_1210, %int0_1211, %1552, %int1_1212 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1553, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1213 = torch.constant.int 1 | |
%int0_1214 = torch.constant.int 0 | |
%int9223372036854775807_1215 = torch.constant.int 9223372036854775807 | |
%int1_1216 = torch.constant.int 1 | |
%1554 = torch.aten.slice.Tensor %1553, %int1_1213, %int0_1214, %int9223372036854775807_1215, %int1_1216 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1554, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1217 = torch.constant.int 0 | |
%1555 = torch.aten.unsqueeze %1551, %int0_1217 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1555, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1218 = torch.constant.int 1 | |
%int0_1219 = torch.constant.int 0 | |
%int9223372036854775807_1220 = torch.constant.int 9223372036854775807 | |
%int1_1221 = torch.constant.int 1 | |
%1556 = torch.aten.slice.Tensor %1555, %int1_1218, %int0_1219, %int9223372036854775807_1220, %int1_1221 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1556, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1222 = torch.constant.int 2 | |
%1557 = torch.aten.unsqueeze %1556, %int2_1222 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1557, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1223 = torch.constant.int 3 | |
%int0_1224 = torch.constant.int 0 | |
%int9223372036854775807_1225 = torch.constant.int 9223372036854775807 | |
%int1_1226 = torch.constant.int 1 | |
%1558 = torch.aten.slice.Tensor %1557, %int3_1223, %int0_1224, %int9223372036854775807_1225, %int1_1226 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1558, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_1227 = torch.constant.int 0 | |
%1559 = torch.aten.unsqueeze %1554, %int0_1227 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1559, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1228 = torch.constant.int 1 | |
%int0_1229 = torch.constant.int 0 | |
%int9223372036854775807_1230 = torch.constant.int 9223372036854775807 | |
%int1_1231 = torch.constant.int 1 | |
%1560 = torch.aten.slice.Tensor %1559, %int1_1228, %int0_1229, %int9223372036854775807_1230, %int1_1231 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1560, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1232 = torch.constant.int 2 | |
%1561 = torch.aten.unsqueeze %1560, %int2_1232 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1561, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1233 = torch.constant.int 3 | |
%int0_1234 = torch.constant.int 0 | |
%int9223372036854775807_1235 = torch.constant.int 9223372036854775807 | |
%int1_1236 = torch.constant.int 1 | |
%1562 = torch.aten.slice.Tensor %1561, %int3_1233, %int0_1234, %int9223372036854775807_1235, %int1_1236 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1562, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_1237 = torch.constant.int 1 | |
%int2_1238 = torch.constant.int 2 | |
%1563 = torch.aten.transpose.int %1558, %int1_1237, %int2_1238 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1563, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1239 = torch.constant.int 1 | |
%int1_1240 = torch.constant.int 1 | |
%int1_1241 = torch.constant.int 1 | |
%int1_1242 = torch.constant.int 1 | |
%1564 = torch.prim.ListConstruct %int1_1239, %int1_1240, %int1_1241, %int1_1242 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1565 = torch.aten.repeat %1563, %1564 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1565, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1243 = torch.constant.int 1 | |
%int2_1244 = torch.constant.int 2 | |
%1566 = torch.aten.transpose.int %1562, %int1_1243, %int2_1244 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1566, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1245 = torch.constant.int 1 | |
%int2_1246 = torch.constant.int 2 | |
%1567 = torch.aten.transpose.int %1506, %int1_1245, %int2_1246 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1567, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1247 = torch.constant.int 1 | |
%int1_1248 = torch.constant.int 1 | |
%int1_1249 = torch.constant.int 1 | |
%int1_1250 = torch.constant.int 1 | |
%1568 = torch.prim.ListConstruct %int1_1247, %int1_1248, %int1_1249, %int1_1250 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1569 = torch.aten.repeat %1566, %1568 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1569, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%1570 = torch.aten.mul.Tensor %1567, %1565 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1570, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int3_1251 = torch.constant.int 3 | |
%int0_1252 = torch.constant.int 0 | |
%int64_1253 = torch.constant.int 64 | |
%int1_1254 = torch.constant.int 1 | |
%1571 = torch.aten.slice.Tensor %1567, %int3_1251, %int0_1252, %int64_1253, %int1_1254 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %1571, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%int3_1255 = torch.constant.int 3 | |
%int64_1256 = torch.constant.int 64 | |
%int9223372036854775807_1257 = torch.constant.int 9223372036854775807 | |
%int1_1258 = torch.constant.int 1 | |
%1572 = torch.aten.slice.Tensor %1567, %int3_1255, %int64_1256, %int9223372036854775807_1257, %int1_1258 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %1572, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%1573 = torch.aten.neg %1572 : !torch.vtensor<[1,32,?,64],bf16> -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %1573, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%1574 = torch.prim.ListConstruct %1573, %1571 : (!torch.vtensor<[1,32,?,64],bf16>, !torch.vtensor<[1,32,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_1259 = torch.constant.int -1 | |
%1575 = torch.aten.cat %1574, %int-1_1259 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1575, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%1576 = torch.aten.mul.Tensor %1575, %1569 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1576, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1260 = torch.constant.int 1 | |
%1577 = torch.aten.add.Tensor %1570, %1576, %int1_1260 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1577, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1261 = torch.constant.int 1 | |
%int2_1262 = torch.constant.int 2 | |
%1578 = torch.aten.transpose.int %1577, %int1_1261, %int2_1262 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1578, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int131072_1263 = torch.constant.int 131072 | |
%none_1264 = torch.constant.none | |
%none_1265 = torch.constant.none | |
%cpu_1266 = torch.constant.device "cpu" | |
%false_1267 = torch.constant.bool false | |
%1579 = torch.aten.arange %int131072_1263, %none_1264, %none_1265, %cpu_1266, %false_1267 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_1268 = torch.constant.int 0 | |
%int128_1269 = torch.constant.int 128 | |
%int2_1270 = torch.constant.int 2 | |
%int4_1271 = torch.constant.int 4 | |
%none_1272 = torch.constant.none | |
%cpu_1273 = torch.constant.device "cpu" | |
%false_1274 = torch.constant.bool false | |
%1580 = torch.aten.arange.start_step %int0_1268, %int128_1269, %int2_1270, %int4_1271, %none_1272, %cpu_1273, %false_1274 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_1275 = torch.constant.int 6 | |
%1581 = torch.prims.convert_element_type %1580, %int6_1275 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_1276 = torch.constant.int 128 | |
%1582 = torch.aten.div.Scalar %1581, %int128_1276 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_1277 = torch.constant.float 5.000000e+05 | |
%1583 = torch.aten.pow.Scalar %float5.000000e05_1277, %1582 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1584 = torch.aten.reciprocal %1583 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_1278 = torch.constant.float 1.000000e+00 | |
%1585 = torch.aten.mul.Scalar %1584, %float1.000000e00_1278 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%1586 = torch.aten.reciprocal %1585 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_1279 = torch.constant.float 6.2831853071795862 | |
%1587 = torch.aten.mul.Scalar %1586, %float6.283190e00_1279 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_1280 = torch.constant.float 8.192000e+03 | |
%1588 = torch.aten.gt.Scalar %1587, %float8.192000e03_1280 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_1281 = torch.constant.int 8 | |
%1589 = torch.aten.div.Scalar %1585, %int8_1281 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1590 = torch.aten.where.self %1588, %1589, %1585 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1591 = torch.aten.reciprocal %1587 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_1282 = torch.constant.int 8192 | |
%1592 = torch.aten.mul.Scalar %1591, %int8192_1282 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1283 = torch.constant.int 1 | |
%int1_1284 = torch.constant.int 1 | |
%1593 = torch.aten.sub.Scalar %1592, %int1_1283, %int1_1284 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_1285 = torch.constant.int 3 | |
%1594 = torch.aten.div.Scalar %1593, %int3_1285 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1286 = torch.constant.int 1 | |
%int1_1287 = torch.constant.int 1 | |
%1595 = torch.aten.rsub.Scalar %1594, %int1_1286, %int1_1287 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%1596 = torch.aten.mul.Tensor %1595, %1590 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_1288 = torch.constant.int 8 | |
%1597 = torch.aten.div.Scalar %1596, %int8_1288 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1598 = torch.aten.mul.Tensor %1594, %1590 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_1289 = torch.constant.int 1 | |
%1599 = torch.aten.add.Tensor %1597, %1598, %int1_1289 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_1290 = torch.constant.float 2.048000e+03 | |
%1600 = torch.aten.lt.Scalar %1587, %float2.048000e03_1290 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1601 = torch.aten.bitwise_not %1600 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_1291 = torch.constant.float 8.192000e+03 | |
%1602 = torch.aten.gt.Scalar %1587, %float8.192000e03_1291 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1603 = torch.aten.bitwise_not %1602 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1604 = torch.aten.mul.Tensor %1601, %1603 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1605 = torch.aten.where.self %1604, %1599, %1590 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1606 = torch.prim.ListConstruct %1605, %1605 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_1292 = torch.constant.int -1 | |
%1607 = torch.aten.cat %1606, %int-1_1292 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_1293 = torch.constant.int 6 | |
%1608 = torch.prims.convert_element_type %1579, %int6_1293 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_1294 = torch.constant.int 131072 | |
%int1_1295 = torch.constant.int 1 | |
%1609 = torch.prim.ListConstruct %int131072_1294, %int1_1295 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1610 = torch.aten.view %1608, %1609 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%1611 = torch.aten.mul.Tensor %1610, %1607 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%1612 = torch.aten.cos %1611 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1296 = torch.constant.int 15 | |
%1613 = torch.prims.convert_element_type %1612, %int15_1296 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%1614 = torch.aten.sin %1611 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1297 = torch.constant.int 15 | |
%1615 = torch.prims.convert_element_type %1614, %int15_1297 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_1298 = torch.constant.int 1 | |
%1616 = torch.aten.size.int %1493, %int1_1298 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_1299 = torch.constant.int 0 | |
%1617 = torch.aten.add.int %int0_1299, %1616 : !torch.int, !torch.int -> !torch.int | |
%int0_1300 = torch.constant.int 0 | |
%int0_1301 = torch.constant.int 0 | |
%int1_1302 = torch.constant.int 1 | |
%1618 = torch.aten.slice.Tensor %1613, %int0_1300, %int0_1301, %1617, %int1_1302 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1618, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1303 = torch.constant.int 1 | |
%int0_1304 = torch.constant.int 0 | |
%int9223372036854775807_1305 = torch.constant.int 9223372036854775807 | |
%int1_1306 = torch.constant.int 1 | |
%1619 = torch.aten.slice.Tensor %1618, %int1_1303, %int0_1304, %int9223372036854775807_1305, %int1_1306 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1619, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1307 = torch.constant.int 0 | |
%1620 = torch.aten.add.int %int0_1307, %1616 : !torch.int, !torch.int -> !torch.int | |
%int0_1308 = torch.constant.int 0 | |
%int0_1309 = torch.constant.int 0 | |
%int1_1310 = torch.constant.int 1 | |
%1621 = torch.aten.slice.Tensor %1615, %int0_1308, %int0_1309, %1620, %int1_1310 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1621, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1311 = torch.constant.int 1 | |
%int0_1312 = torch.constant.int 0 | |
%int9223372036854775807_1313 = torch.constant.int 9223372036854775807 | |
%int1_1314 = torch.constant.int 1 | |
%1622 = torch.aten.slice.Tensor %1621, %int1_1311, %int0_1312, %int9223372036854775807_1313, %int1_1314 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1622, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1315 = torch.constant.int 0 | |
%1623 = torch.aten.unsqueeze %1619, %int0_1315 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1623, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1316 = torch.constant.int 1 | |
%int0_1317 = torch.constant.int 0 | |
%int9223372036854775807_1318 = torch.constant.int 9223372036854775807 | |
%int1_1319 = torch.constant.int 1 | |
%1624 = torch.aten.slice.Tensor %1623, %int1_1316, %int0_1317, %int9223372036854775807_1318, %int1_1319 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1624, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1320 = torch.constant.int 2 | |
%1625 = torch.aten.unsqueeze %1624, %int2_1320 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1625, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1321 = torch.constant.int 3 | |
%int0_1322 = torch.constant.int 0 | |
%int9223372036854775807_1323 = torch.constant.int 9223372036854775807 | |
%int1_1324 = torch.constant.int 1 | |
%1626 = torch.aten.slice.Tensor %1625, %int3_1321, %int0_1322, %int9223372036854775807_1323, %int1_1324 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1626, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_1325 = torch.constant.int 0 | |
%1627 = torch.aten.unsqueeze %1622, %int0_1325 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1627, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1326 = torch.constant.int 1 | |
%int0_1327 = torch.constant.int 0 | |
%int9223372036854775807_1328 = torch.constant.int 9223372036854775807 | |
%int1_1329 = torch.constant.int 1 | |
%1628 = torch.aten.slice.Tensor %1627, %int1_1326, %int0_1327, %int9223372036854775807_1328, %int1_1329 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1628, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1330 = torch.constant.int 2 | |
%1629 = torch.aten.unsqueeze %1628, %int2_1330 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1629, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1331 = torch.constant.int 3 | |
%int0_1332 = torch.constant.int 0 | |
%int9223372036854775807_1333 = torch.constant.int 9223372036854775807 | |
%int1_1334 = torch.constant.int 1 | |
%1630 = torch.aten.slice.Tensor %1629, %int3_1331, %int0_1332, %int9223372036854775807_1333, %int1_1334 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1630, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_1335 = torch.constant.int 1 | |
%int2_1336 = torch.constant.int 2 | |
%1631 = torch.aten.transpose.int %1626, %int1_1335, %int2_1336 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1631, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1337 = torch.constant.int 1 | |
%int1_1338 = torch.constant.int 1 | |
%int1_1339 = torch.constant.int 1 | |
%int1_1340 = torch.constant.int 1 | |
%1632 = torch.prim.ListConstruct %int1_1337, %int1_1338, %int1_1339, %int1_1340 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1633 = torch.aten.repeat %1631, %1632 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1633, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1341 = torch.constant.int 1 | |
%int2_1342 = torch.constant.int 2 | |
%1634 = torch.aten.transpose.int %1630, %int1_1341, %int2_1342 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1634, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1343 = torch.constant.int 1 | |
%int2_1344 = torch.constant.int 2 | |
%1635 = torch.aten.transpose.int %1508, %int1_1343, %int2_1344 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1635, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_1345 = torch.constant.int 1 | |
%int1_1346 = torch.constant.int 1 | |
%int1_1347 = torch.constant.int 1 | |
%int1_1348 = torch.constant.int 1 | |
%1636 = torch.prim.ListConstruct %int1_1345, %int1_1346, %int1_1347, %int1_1348 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1637 = torch.aten.repeat %1634, %1636 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1637, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%1638 = torch.aten.mul.Tensor %1635, %1633 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1638, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int3_1349 = torch.constant.int 3 | |
%int0_1350 = torch.constant.int 0 | |
%int64_1351 = torch.constant.int 64 | |
%int1_1352 = torch.constant.int 1 | |
%1639 = torch.aten.slice.Tensor %1635, %int3_1349, %int0_1350, %int64_1351, %int1_1352 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1639, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%int3_1353 = torch.constant.int 3 | |
%int64_1354 = torch.constant.int 64 | |
%int9223372036854775807_1355 = torch.constant.int 9223372036854775807 | |
%int1_1356 = torch.constant.int 1 | |
%1640 = torch.aten.slice.Tensor %1635, %int3_1353, %int64_1354, %int9223372036854775807_1355, %int1_1356 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1640, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%1641 = torch.aten.neg %1640 : !torch.vtensor<[1,8,?,64],bf16> -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1641, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%1642 = torch.prim.ListConstruct %1641, %1639 : (!torch.vtensor<[1,8,?,64],bf16>, !torch.vtensor<[1,8,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_1357 = torch.constant.int -1 | |
%1643 = torch.aten.cat %1642, %int-1_1357 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1643, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%1644 = torch.aten.mul.Tensor %1643, %1637 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1644, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_1358 = torch.constant.int 1 | |
%1645 = torch.aten.add.Tensor %1638, %1644, %int1_1358 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,8,?,128],bf16>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1645, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_1359 = torch.constant.int 1 | |
%int2_1360 = torch.constant.int 2 | |
%1646 = torch.aten.transpose.int %1645, %int1_1359, %int2_1360 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1646, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%1647 = torch.aten.div.Tensor %1646, %59 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1647, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_1361 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1362 = torch.constant.float 2.400000e+02 | |
%1648 = torch.aten.clamp %1647, %float-2.400000e02_1361, %float2.400000e02_1362 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1648, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_1363 = torch.constant.int 26 | |
%1649 = torch.prims.convert_element_type %1648, %int26_1363 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1649, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%1650 = torch.aten.div.Tensor %1510, %59 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1650, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_1364 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1365 = torch.constant.float 2.400000e+02 | |
%1651 = torch.aten.clamp %1650, %float-2.400000e02_1364, %float2.400000e02_1365 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1651, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_1366 = torch.constant.int 26 | |
%1652 = torch.prims.convert_element_type %1651, %int26_1366 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1652, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%int64_1367 = torch.constant.int 64 | |
%1653 = torch.aten.mul.Scalar %arg2, %int64_1367 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1653, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int6_1368 = torch.constant.int 6 | |
%int1_1369 = torch.constant.int 1 | |
%1654 = torch.aten.add.Scalar %1653, %int6_1368, %int1_1369 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1654, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int1_1370 = torch.constant.int 1 | |
%int32_1371 = torch.constant.int 32 | |
%int8_1372 = torch.constant.int 8 | |
%int128_1373 = torch.constant.int 128 | |
%1655 = torch.prim.ListConstruct %int1_1370, %748, %int32_1371, %int8_1372, %int128_1373 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1656 = torch.aten.view %1649, %1655 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1656, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_1374 = torch.constant.int 32 | |
%int8_1375 = torch.constant.int 8 | |
%int128_1376 = torch.constant.int 128 | |
%1657 = torch.prim.ListConstruct %748, %int32_1374, %int8_1375, %int128_1376 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1658 = torch.aten.view %1656, %1657 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1658, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%1659 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%1660 = torch.aten.view %1654, %1659 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %1660, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%int32_1377 = torch.constant.int 32 | |
%int2_1378 = torch.constant.int 2 | |
%int32_1379 = torch.constant.int 32 | |
%int8_1380 = torch.constant.int 8 | |
%int128_1381 = torch.constant.int 128 | |
%1661 = torch.prim.ListConstruct %739, %int32_1377, %int2_1378, %int32_1379, %int8_1380, %int128_1381 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1662 = torch.aten.view %1385, %1661 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1662, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_1382 = torch.constant.int 32 | |
%1663 = torch.aten.mul.int %739, %int32_1382 : !torch.int, !torch.int -> !torch.int | |
%int2_1383 = torch.constant.int 2 | |
%1664 = torch.aten.mul.int %1663, %int2_1383 : !torch.int, !torch.int -> !torch.int | |
%int32_1384 = torch.constant.int 32 | |
%int8_1385 = torch.constant.int 8 | |
%int128_1386 = torch.constant.int 128 | |
%1665 = torch.prim.ListConstruct %1664, %int32_1384, %int8_1385, %int128_1386 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1666 = torch.aten.view %1662, %1665 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1666, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%1667 = torch.prim.ListConstruct %1660 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_1387 = torch.constant.bool false | |
%1668 = torch.aten.index_put %1666, %1667, %1658, %false_1387 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1668, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_1388 = torch.constant.int 32 | |
%int2_1389 = torch.constant.int 2 | |
%int32_1390 = torch.constant.int 32 | |
%int8_1391 = torch.constant.int 8 | |
%int128_1392 = torch.constant.int 128 | |
%1669 = torch.prim.ListConstruct %739, %int32_1388, %int2_1389, %int32_1390, %int8_1391, %int128_1392 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1670 = torch.aten.view %1668, %1669 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1670, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_1393 = torch.constant.int 2097152 | |
%1671 = torch.prim.ListConstruct %739, %int2097152_1393 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1672 = torch.aten.view %1670, %1671 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1672, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int32_1394 = torch.constant.int 32 | |
%int2_1395 = torch.constant.int 2 | |
%int32_1396 = torch.constant.int 32 | |
%int8_1397 = torch.constant.int 8 | |
%int128_1398 = torch.constant.int 128 | |
%1673 = torch.prim.ListConstruct %739, %int32_1394, %int2_1395, %int32_1396, %int8_1397, %int128_1398 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1674 = torch.aten.view %1672, %1673 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1674, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_1399 = torch.constant.int 32 | |
%int8_1400 = torch.constant.int 8 | |
%int128_1401 = torch.constant.int 128 | |
%1675 = torch.prim.ListConstruct %1664, %int32_1399, %int8_1400, %int128_1401 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1676 = torch.aten.view %1674, %1675 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1676, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_1402 = torch.constant.int 1 | |
%int32_1403 = torch.constant.int 32 | |
%int8_1404 = torch.constant.int 8 | |
%int128_1405 = torch.constant.int 128 | |
%1677 = torch.prim.ListConstruct %int1_1402, %748, %int32_1403, %int8_1404, %int128_1405 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1678 = torch.aten.view %1652, %1677 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1678, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_1406 = torch.constant.int 32 | |
%int8_1407 = torch.constant.int 8 | |
%int128_1408 = torch.constant.int 128 | |
%1679 = torch.prim.ListConstruct %748, %int32_1406, %int8_1407, %int128_1408 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1680 = torch.aten.view %1678, %1679 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1680, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_1409 = torch.constant.int 1 | |
%int1_1410 = torch.constant.int 1 | |
%1681 = torch.aten.add.Scalar %1654, %int1_1409, %int1_1410 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1681, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%1682 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%1683 = torch.aten.view %1681, %1682 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %1683, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%1684 = torch.prim.ListConstruct %1683 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_1411 = torch.constant.bool false | |
%1685 = torch.aten.index_put %1676, %1684, %1680, %false_1411 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1685, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_1412 = torch.constant.int 32 | |
%int2_1413 = torch.constant.int 2 | |
%int32_1414 = torch.constant.int 32 | |
%int8_1415 = torch.constant.int 8 | |
%int128_1416 = torch.constant.int 128 | |
%1686 = torch.prim.ListConstruct %739, %int32_1412, %int2_1413, %int32_1414, %int8_1415, %int128_1416 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1687 = torch.aten.view %1685, %1686 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1687, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_1417 = torch.constant.int 2097152 | |
%1688 = torch.prim.ListConstruct %739, %int2097152_1417 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1689 = torch.aten.view %1687, %1688 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1689, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int-2_1418 = torch.constant.int -2 | |
%1690 = torch.aten.unsqueeze %1649, %int-2_1418 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1690, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_1419 = torch.constant.int 1 | |
%int8_1420 = torch.constant.int 8 | |
%int4_1421 = torch.constant.int 4 | |
%int128_1422 = torch.constant.int 128 | |
%1691 = torch.prim.ListConstruct %int1_1419, %1616, %int8_1420, %int4_1421, %int128_1422 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_1423 = torch.constant.bool false | |
%1692 = torch.aten.expand %1690, %1691, %false_1423 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1692, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_1424 = torch.constant.int 0 | |
%1693 = torch.aten.clone %1692, %int0_1424 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1693, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_1425 = torch.constant.int 1 | |
%int32_1426 = torch.constant.int 32 | |
%int128_1427 = torch.constant.int 128 | |
%1694 = torch.prim.ListConstruct %int1_1425, %1616, %int32_1426, %int128_1427 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1695 = torch.aten._unsafe_view %1693, %1694 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1695, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int-2_1428 = torch.constant.int -2 | |
%1696 = torch.aten.unsqueeze %1652, %int-2_1428 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1696, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_1429 = torch.constant.int 1 | |
%1697 = torch.aten.size.int %1503, %int1_1429 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int1_1430 = torch.constant.int 1 | |
%int8_1431 = torch.constant.int 8 | |
%int4_1432 = torch.constant.int 4 | |
%int128_1433 = torch.constant.int 128 | |
%1698 = torch.prim.ListConstruct %int1_1430, %1697, %int8_1431, %int4_1432, %int128_1433 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_1434 = torch.constant.bool false | |
%1699 = torch.aten.expand %1696, %1698, %false_1434 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1699, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_1435 = torch.constant.int 0 | |
%1700 = torch.aten.clone %1699, %int0_1435 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1700, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_1436 = torch.constant.int 1 | |
%int32_1437 = torch.constant.int 32 | |
%int128_1438 = torch.constant.int 128 | |
%1701 = torch.prim.ListConstruct %int1_1436, %1697, %int32_1437, %int128_1438 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1702 = torch.aten._unsafe_view %1700, %1701 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1702, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int6_1439 = torch.constant.int 6 | |
%1703 = torch.prims.convert_element_type %1695, %int6_1439 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1703, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%1704 = torch.aten.mul.Tensor %1703, %59 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1704, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_1440 = torch.constant.int 15 | |
%1705 = torch.prims.convert_element_type %1704, %int15_1440 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1705, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int6_1441 = torch.constant.int 6 | |
%1706 = torch.prims.convert_element_type %1702, %int6_1441 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1706, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%1707 = torch.aten.mul.Tensor %1706, %59 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %1707, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_1442 = torch.constant.int 15 | |
%1708 = torch.prims.convert_element_type %1707, %int15_1442 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1708, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_1443 = torch.constant.int 1 | |
%int2_1444 = torch.constant.int 2 | |
%1709 = torch.aten.transpose.int %1578, %int1_1443, %int2_1444 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1709, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1445 = torch.constant.int 1 | |
%int2_1446 = torch.constant.int 2 | |
%1710 = torch.aten.transpose.int %1705, %int1_1445, %int2_1446 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1710, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1447 = torch.constant.int 1 | |
%int2_1448 = torch.constant.int 2 | |
%1711 = torch.aten.transpose.int %1708, %int1_1447, %int2_1448 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1711, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%float0.000000e00_1449 = torch.constant.float 0.000000e+00 | |
%true_1450 = torch.constant.bool true | |
%none_1451 = torch.constant.none | |
%none_1452 = torch.constant.none | |
%1712:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%1709, %1710, %1711, %float0.000000e00_1449, %true_1450, %none_1451, %none_1452) : (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?],f32>) | |
torch.bind_symbolic_shape %1712#0, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1453 = torch.constant.int 1 | |
%int2_1454 = torch.constant.int 2 | |
%1713 = torch.aten.transpose.int %1712#0, %int1_1453, %int2_1454 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1713, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_1455 = torch.constant.int 1 | |
%int4096_1456 = torch.constant.int 4096 | |
%1714 = torch.prim.ListConstruct %int1_1455, %1548, %int4096_1456 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1715 = torch.aten.view %1713, %1714 : !torch.vtensor<[1,?,32,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1715, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1716 = torch.aten.div.Tensor %1715, %60 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1716, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1457 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1458 = torch.constant.float 2.400000e+02 | |
%1717 = torch.aten.clamp %1716, %float-2.400000e02_1457, %float2.400000e02_1458 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1717, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1459 = torch.constant.int 26 | |
%1718 = torch.prims.convert_element_type %1717, %int26_1459 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1718, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1460 = torch.constant.int -2 | |
%int-1_1461 = torch.constant.int -1 | |
%1719 = torch.aten.transpose.int %61, %int-2_1460, %int-1_1461 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_1462 = torch.constant.int 4096 | |
%1720 = torch.prim.ListConstruct %1548, %int4096_1462 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1721 = torch.aten.view %1718, %1720 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1721, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1722 = torch.aten.mm %1721, %1719 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1722, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_1463 = torch.constant.int 1 | |
%int4096_1464 = torch.constant.int 4096 | |
%1723 = torch.prim.ListConstruct %int1_1463, %1548, %int4096_1464 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1724 = torch.aten.view %1722, %1723 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1724, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_1465 = torch.constant.int 15 | |
%1725 = torch.prims.convert_element_type %1724, %int15_1465 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1725, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_1466 = torch.constant.int 1 | |
%1726 = torch.aten.add.Tensor %1465, %1725, %int1_1466 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1726, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_1467 = torch.constant.int 6 | |
%1727 = torch.prims.convert_element_type %1726, %int6_1467 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1727, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_1468 = torch.constant.int 2 | |
%1728 = torch.aten.pow.Tensor_Scalar %1727, %int2_1468 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1728, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_1469 = torch.constant.int -1 | |
%1729 = torch.prim.ListConstruct %int-1_1469 : (!torch.int) -> !torch.list<int> | |
%true_1470 = torch.constant.bool true | |
%none_1471 = torch.constant.none | |
%1730 = torch.aten.mean.dim %1728, %1729, %true_1470, %none_1471 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1730, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_1472 = torch.constant.float 1.000000e-05 | |
%int1_1473 = torch.constant.int 1 | |
%1731 = torch.aten.add.Scalar %1730, %float1.000000e-05_1472, %int1_1473 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1731, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1732 = torch.aten.rsqrt %1731 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1732, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1733 = torch.aten.mul.Tensor %1727, %1732 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1733, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_1474 = torch.constant.int 15 | |
%1734 = torch.prims.convert_element_type %1733, %int15_1474 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1734, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1735 = torch.aten.mul.Tensor %62, %1734 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1735, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1736 = torch.aten.div.Tensor %1735, %63 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1736, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1475 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1476 = torch.constant.float 2.400000e+02 | |
%1737 = torch.aten.clamp %1736, %float-2.400000e02_1475, %float2.400000e02_1476 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1737, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1477 = torch.constant.int 26 | |
%1738 = torch.prims.convert_element_type %1737, %int26_1477 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1738, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1478 = torch.constant.int -2 | |
%int-1_1479 = torch.constant.int -1 | |
%1739 = torch.aten.transpose.int %64, %int-2_1478, %int-1_1479 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_1480 = torch.constant.int 4096 | |
%1740 = torch.prim.ListConstruct %564, %int4096_1480 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1741 = torch.aten.view %1738, %1740 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1741, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1742 = torch.aten.mm %1741, %1739 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1742, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_1481 = torch.constant.int 1 | |
%int14336_1482 = torch.constant.int 14336 | |
%1743 = torch.prim.ListConstruct %int1_1481, %564, %int14336_1482 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1744 = torch.aten.view %1742, %1743 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1744, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_1483 = torch.constant.int 15 | |
%1745 = torch.prims.convert_element_type %1744, %int15_1483 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1745, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1746 = torch.aten.silu %1745 : !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1746, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1747 = torch.aten.div.Tensor %1735, %65 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1747, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1484 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1485 = torch.constant.float 2.400000e+02 | |
%1748 = torch.aten.clamp %1747, %float-2.400000e02_1484, %float2.400000e02_1485 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1748, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1486 = torch.constant.int 26 | |
%1749 = torch.prims.convert_element_type %1748, %int26_1486 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1749, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1487 = torch.constant.int -2 | |
%int-1_1488 = torch.constant.int -1 | |
%1750 = torch.aten.transpose.int %66, %int-2_1487, %int-1_1488 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_1489 = torch.constant.int 4096 | |
%1751 = torch.prim.ListConstruct %564, %int4096_1489 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1752 = torch.aten.view %1749, %1751 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1752, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1753 = torch.aten.mm %1752, %1750 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1753, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_1490 = torch.constant.int 1 | |
%int14336_1491 = torch.constant.int 14336 | |
%1754 = torch.prim.ListConstruct %int1_1490, %564, %int14336_1491 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1755 = torch.aten.view %1753, %1754 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1755, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_1492 = torch.constant.int 15 | |
%1756 = torch.prims.convert_element_type %1755, %int15_1492 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1756, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1757 = torch.aten.mul.Tensor %1746, %1756 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1757, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%1758 = torch.aten.div.Tensor %1757, %67 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1758, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%float-2.400000e02_1493 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1494 = torch.constant.float 2.400000e+02 | |
%1759 = torch.aten.clamp %1758, %float-2.400000e02_1493, %float2.400000e02_1494 : !torch.vtensor<[1,?,14336],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %1759, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%int26_1495 = torch.constant.int 26 | |
%1760 = torch.prims.convert_element_type %1759, %int26_1495 : !torch.vtensor<[1,?,14336],bf16>, !torch.int -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1760, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int-2_1496 = torch.constant.int -2 | |
%int-1_1497 = torch.constant.int -1 | |
%1761 = torch.aten.transpose.int %68, %int-2_1496, %int-1_1497 : !torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%int1_1498 = torch.constant.int 1 | |
%1762 = torch.aten.size.int %1744, %int1_1498 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int14336_1499 = torch.constant.int 14336 | |
%1763 = torch.prim.ListConstruct %1762, %int14336_1499 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1764 = torch.aten.view %1760, %1763 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1764, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%1765 = torch.aten.mm %1764, %1761 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.vtensor<[14336,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1765, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_1500 = torch.constant.int 1 | |
%int4096_1501 = torch.constant.int 4096 | |
%1766 = torch.prim.ListConstruct %int1_1500, %1762, %int4096_1501 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1767 = torch.aten.view %1765, %1766 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1767, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_1502 = torch.constant.int 15 | |
%1768 = torch.prims.convert_element_type %1767, %int15_1502 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1768, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_1503 = torch.constant.int 1 | |
%1769 = torch.aten.add.Tensor %1726, %1768, %int1_1503 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1769, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_1504 = torch.constant.int 6 | |
%1770 = torch.prims.convert_element_type %1769, %int6_1504 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1770, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_1505 = torch.constant.int 2 | |
%1771 = torch.aten.pow.Tensor_Scalar %1770, %int2_1505 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1771, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_1506 = torch.constant.int -1 | |
%1772 = torch.prim.ListConstruct %int-1_1506 : (!torch.int) -> !torch.list<int> | |
%true_1507 = torch.constant.bool true | |
%none_1508 = torch.constant.none | |
%1773 = torch.aten.mean.dim %1771, %1772, %true_1507, %none_1508 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1773, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_1509 = torch.constant.float 1.000000e-05 | |
%int1_1510 = torch.constant.int 1 | |
%1774 = torch.aten.add.Scalar %1773, %float1.000000e-05_1509, %int1_1510 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1774, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1775 = torch.aten.rsqrt %1774 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %1775, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%1776 = torch.aten.mul.Tensor %1770, %1775 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %1776, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_1511 = torch.constant.int 15 | |
%1777 = torch.prims.convert_element_type %1776, %int15_1511 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1777, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1778 = torch.aten.mul.Tensor %69, %1777 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1778, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1779 = torch.aten.div.Tensor %1778, %70 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1779, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1512 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1513 = torch.constant.float 2.400000e+02 | |
%1780 = torch.aten.clamp %1779, %float-2.400000e02_1512, %float2.400000e02_1513 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1780, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1514 = torch.constant.int 26 | |
%1781 = torch.prims.convert_element_type %1780, %int26_1514 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1781, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1515 = torch.constant.int -2 | |
%int-1_1516 = torch.constant.int -1 | |
%1782 = torch.aten.transpose.int %71, %int-2_1515, %int-1_1516 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_1517 = torch.constant.int 4096 | |
%1783 = torch.prim.ListConstruct %564, %int4096_1517 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1784 = torch.aten.view %1781, %1783 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1784, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1785 = torch.aten.mm %1784, %1782 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1785, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_1518 = torch.constant.int 1 | |
%int4096_1519 = torch.constant.int 4096 | |
%1786 = torch.prim.ListConstruct %int1_1518, %564, %int4096_1519 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1787 = torch.aten.view %1785, %1786 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1787, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_1520 = torch.constant.int 15 | |
%1788 = torch.prims.convert_element_type %1787, %int15_1520 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1788, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%1789 = torch.aten.div.Tensor %1778, %72 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1789, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1521 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1522 = torch.constant.float 2.400000e+02 | |
%1790 = torch.aten.clamp %1789, %float-2.400000e02_1521, %float2.400000e02_1522 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1790, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1523 = torch.constant.int 26 | |
%1791 = torch.prims.convert_element_type %1790, %int26_1523 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1791, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1524 = torch.constant.int -2 | |
%int-1_1525 = torch.constant.int -1 | |
%1792 = torch.aten.transpose.int %73, %int-2_1524, %int-1_1525 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_1526 = torch.constant.int 4096 | |
%1793 = torch.prim.ListConstruct %564, %int4096_1526 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1794 = torch.aten.view %1791, %1793 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1794, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1795 = torch.aten.mm %1794, %1792 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1795, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_1527 = torch.constant.int 1 | |
%int1024_1528 = torch.constant.int 1024 | |
%1796 = torch.prim.ListConstruct %int1_1527, %564, %int1024_1528 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1797 = torch.aten.view %1795, %1796 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1797, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_1529 = torch.constant.int 15 | |
%1798 = torch.prims.convert_element_type %1797, %int15_1529 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %1798, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%1799 = torch.aten.div.Tensor %1778, %74 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1799, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1530 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1531 = torch.constant.float 2.400000e+02 | |
%1800 = torch.aten.clamp %1799, %float-2.400000e02_1530, %float2.400000e02_1531 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %1800, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1532 = torch.constant.int 26 | |
%1801 = torch.prims.convert_element_type %1800, %int26_1532 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1801, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1533 = torch.constant.int -2 | |
%int-1_1534 = torch.constant.int -1 | |
%1802 = torch.aten.transpose.int %75, %int-2_1533, %int-1_1534 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_1535 = torch.constant.int 4096 | |
%1803 = torch.prim.ListConstruct %564, %int4096_1535 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1804 = torch.aten.view %1801, %1803 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1804, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%1805 = torch.aten.mm %1804, %1802 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1805, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_1536 = torch.constant.int 1 | |
%int1024_1537 = torch.constant.int 1024 | |
%1806 = torch.prim.ListConstruct %int1_1536, %564, %int1024_1537 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1807 = torch.aten.view %1805, %1806 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1807, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_1538 = torch.constant.int 15 | |
%1808 = torch.prims.convert_element_type %1807, %int15_1538 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %1808, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%int1_1539 = torch.constant.int 1 | |
%int32_1540 = torch.constant.int 32 | |
%int128_1541 = torch.constant.int 128 | |
%1809 = torch.prim.ListConstruct %int1_1539, %564, %int32_1540, %int128_1541 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1810 = torch.aten.view %1788, %1809 : !torch.vtensor<[1,?,4096],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1810, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_1542 = torch.constant.int 1 | |
%int8_1543 = torch.constant.int 8 | |
%int128_1544 = torch.constant.int 128 | |
%1811 = torch.prim.ListConstruct %int1_1542, %564, %int8_1543, %int128_1544 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1812 = torch.aten.view %1798, %1811 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1812, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int1_1545 = torch.constant.int 1 | |
%int8_1546 = torch.constant.int 8 | |
%int128_1547 = torch.constant.int 128 | |
%1813 = torch.prim.ListConstruct %int1_1545, %564, %int8_1546, %int128_1547 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1814 = torch.aten.view %1808, %1813 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1814, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int131072_1548 = torch.constant.int 131072 | |
%none_1549 = torch.constant.none | |
%none_1550 = torch.constant.none | |
%cpu_1551 = torch.constant.device "cpu" | |
%false_1552 = torch.constant.bool false | |
%1815 = torch.aten.arange %int131072_1548, %none_1549, %none_1550, %cpu_1551, %false_1552 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_1553 = torch.constant.int 0 | |
%int128_1554 = torch.constant.int 128 | |
%int2_1555 = torch.constant.int 2 | |
%int4_1556 = torch.constant.int 4 | |
%none_1557 = torch.constant.none | |
%cpu_1558 = torch.constant.device "cpu" | |
%false_1559 = torch.constant.bool false | |
%1816 = torch.aten.arange.start_step %int0_1553, %int128_1554, %int2_1555, %int4_1556, %none_1557, %cpu_1558, %false_1559 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_1560 = torch.constant.int 6 | |
%1817 = torch.prims.convert_element_type %1816, %int6_1560 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_1561 = torch.constant.int 128 | |
%1818 = torch.aten.div.Scalar %1817, %int128_1561 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_1562 = torch.constant.float 5.000000e+05 | |
%1819 = torch.aten.pow.Scalar %float5.000000e05_1562, %1818 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1820 = torch.aten.reciprocal %1819 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_1563 = torch.constant.float 1.000000e+00 | |
%1821 = torch.aten.mul.Scalar %1820, %float1.000000e00_1563 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%1822 = torch.aten.reciprocal %1821 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_1564 = torch.constant.float 6.2831853071795862 | |
%1823 = torch.aten.mul.Scalar %1822, %float6.283190e00_1564 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_1565 = torch.constant.float 8.192000e+03 | |
%1824 = torch.aten.gt.Scalar %1823, %float8.192000e03_1565 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_1566 = torch.constant.int 8 | |
%1825 = torch.aten.div.Scalar %1821, %int8_1566 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1826 = torch.aten.where.self %1824, %1825, %1821 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1827 = torch.aten.reciprocal %1823 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_1567 = torch.constant.int 8192 | |
%1828 = torch.aten.mul.Scalar %1827, %int8192_1567 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1568 = torch.constant.int 1 | |
%int1_1569 = torch.constant.int 1 | |
%1829 = torch.aten.sub.Scalar %1828, %int1_1568, %int1_1569 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_1570 = torch.constant.int 3 | |
%1830 = torch.aten.div.Scalar %1829, %int3_1570 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1571 = torch.constant.int 1 | |
%int1_1572 = torch.constant.int 1 | |
%1831 = torch.aten.rsub.Scalar %1830, %int1_1571, %int1_1572 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%1832 = torch.aten.mul.Tensor %1831, %1826 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_1573 = torch.constant.int 8 | |
%1833 = torch.aten.div.Scalar %1832, %int8_1573 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1834 = torch.aten.mul.Tensor %1830, %1826 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_1574 = torch.constant.int 1 | |
%1835 = torch.aten.add.Tensor %1833, %1834, %int1_1574 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_1575 = torch.constant.float 2.048000e+03 | |
%1836 = torch.aten.lt.Scalar %1823, %float2.048000e03_1575 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1837 = torch.aten.bitwise_not %1836 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_1576 = torch.constant.float 8.192000e+03 | |
%1838 = torch.aten.gt.Scalar %1823, %float8.192000e03_1576 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1839 = torch.aten.bitwise_not %1838 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1840 = torch.aten.mul.Tensor %1837, %1839 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1841 = torch.aten.where.self %1840, %1835, %1826 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1842 = torch.prim.ListConstruct %1841, %1841 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_1577 = torch.constant.int -1 | |
%1843 = torch.aten.cat %1842, %int-1_1577 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_1578 = torch.constant.int 6 | |
%1844 = torch.prims.convert_element_type %1815, %int6_1578 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_1579 = torch.constant.int 131072 | |
%int1_1580 = torch.constant.int 1 | |
%1845 = torch.prim.ListConstruct %int131072_1579, %int1_1580 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1846 = torch.aten.view %1844, %1845 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%1847 = torch.aten.mul.Tensor %1846, %1843 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%1848 = torch.aten.cos %1847 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1581 = torch.constant.int 15 | |
%1849 = torch.prims.convert_element_type %1848, %int15_1581 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%1850 = torch.aten.sin %1847 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1582 = torch.constant.int 15 | |
%1851 = torch.prims.convert_element_type %1850, %int15_1582 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_1583 = torch.constant.int 1 | |
%1852 = torch.aten.size.int %1787, %int1_1583 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_1584 = torch.constant.int 0 | |
%1853 = torch.aten.add.int %int0_1584, %1852 : !torch.int, !torch.int -> !torch.int | |
%int0_1585 = torch.constant.int 0 | |
%int0_1586 = torch.constant.int 0 | |
%int1_1587 = torch.constant.int 1 | |
%1854 = torch.aten.slice.Tensor %1849, %int0_1585, %int0_1586, %1853, %int1_1587 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1854, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1588 = torch.constant.int 1 | |
%int0_1589 = torch.constant.int 0 | |
%int9223372036854775807_1590 = torch.constant.int 9223372036854775807 | |
%int1_1591 = torch.constant.int 1 | |
%1855 = torch.aten.slice.Tensor %1854, %int1_1588, %int0_1589, %int9223372036854775807_1590, %int1_1591 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1855, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1592 = torch.constant.int 0 | |
%1856 = torch.aten.add.int %int0_1592, %1852 : !torch.int, !torch.int -> !torch.int | |
%int0_1593 = torch.constant.int 0 | |
%int0_1594 = torch.constant.int 0 | |
%int1_1595 = torch.constant.int 1 | |
%1857 = torch.aten.slice.Tensor %1851, %int0_1593, %int0_1594, %1856, %int1_1595 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1857, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1596 = torch.constant.int 1 | |
%int0_1597 = torch.constant.int 0 | |
%int9223372036854775807_1598 = torch.constant.int 9223372036854775807 | |
%int1_1599 = torch.constant.int 1 | |
%1858 = torch.aten.slice.Tensor %1857, %int1_1596, %int0_1597, %int9223372036854775807_1598, %int1_1599 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1858, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1600 = torch.constant.int 0 | |
%1859 = torch.aten.unsqueeze %1855, %int0_1600 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1859, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1601 = torch.constant.int 1 | |
%int0_1602 = torch.constant.int 0 | |
%int9223372036854775807_1603 = torch.constant.int 9223372036854775807 | |
%int1_1604 = torch.constant.int 1 | |
%1860 = torch.aten.slice.Tensor %1859, %int1_1601, %int0_1602, %int9223372036854775807_1603, %int1_1604 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1860, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1605 = torch.constant.int 2 | |
%1861 = torch.aten.unsqueeze %1860, %int2_1605 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1861, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1606 = torch.constant.int 3 | |
%int0_1607 = torch.constant.int 0 | |
%int9223372036854775807_1608 = torch.constant.int 9223372036854775807 | |
%int1_1609 = torch.constant.int 1 | |
%1862 = torch.aten.slice.Tensor %1861, %int3_1606, %int0_1607, %int9223372036854775807_1608, %int1_1609 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1862, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_1610 = torch.constant.int 0 | |
%1863 = torch.aten.unsqueeze %1858, %int0_1610 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1863, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1611 = torch.constant.int 1 | |
%int0_1612 = torch.constant.int 0 | |
%int9223372036854775807_1613 = torch.constant.int 9223372036854775807 | |
%int1_1614 = torch.constant.int 1 | |
%1864 = torch.aten.slice.Tensor %1863, %int1_1611, %int0_1612, %int9223372036854775807_1613, %int1_1614 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1864, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1615 = torch.constant.int 2 | |
%1865 = torch.aten.unsqueeze %1864, %int2_1615 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1865, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1616 = torch.constant.int 3 | |
%int0_1617 = torch.constant.int 0 | |
%int9223372036854775807_1618 = torch.constant.int 9223372036854775807 | |
%int1_1619 = torch.constant.int 1 | |
%1866 = torch.aten.slice.Tensor %1865, %int3_1616, %int0_1617, %int9223372036854775807_1618, %int1_1619 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1866, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_1620 = torch.constant.int 1 | |
%int2_1621 = torch.constant.int 2 | |
%1867 = torch.aten.transpose.int %1862, %int1_1620, %int2_1621 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1867, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1622 = torch.constant.int 1 | |
%int1_1623 = torch.constant.int 1 | |
%int1_1624 = torch.constant.int 1 | |
%int1_1625 = torch.constant.int 1 | |
%1868 = torch.prim.ListConstruct %int1_1622, %int1_1623, %int1_1624, %int1_1625 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1869 = torch.aten.repeat %1867, %1868 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1869, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1626 = torch.constant.int 1 | |
%int2_1627 = torch.constant.int 2 | |
%1870 = torch.aten.transpose.int %1866, %int1_1626, %int2_1627 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1870, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1628 = torch.constant.int 1 | |
%int2_1629 = torch.constant.int 2 | |
%1871 = torch.aten.transpose.int %1810, %int1_1628, %int2_1629 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1871, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1630 = torch.constant.int 1 | |
%int1_1631 = torch.constant.int 1 | |
%int1_1632 = torch.constant.int 1 | |
%int1_1633 = torch.constant.int 1 | |
%1872 = torch.prim.ListConstruct %int1_1630, %int1_1631, %int1_1632, %int1_1633 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1873 = torch.aten.repeat %1870, %1872 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1873, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%1874 = torch.aten.mul.Tensor %1871, %1869 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1874, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int3_1634 = torch.constant.int 3 | |
%int0_1635 = torch.constant.int 0 | |
%int64_1636 = torch.constant.int 64 | |
%int1_1637 = torch.constant.int 1 | |
%1875 = torch.aten.slice.Tensor %1871, %int3_1634, %int0_1635, %int64_1636, %int1_1637 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %1875, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%int3_1638 = torch.constant.int 3 | |
%int64_1639 = torch.constant.int 64 | |
%int9223372036854775807_1640 = torch.constant.int 9223372036854775807 | |
%int1_1641 = torch.constant.int 1 | |
%1876 = torch.aten.slice.Tensor %1871, %int3_1638, %int64_1639, %int9223372036854775807_1640, %int1_1641 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %1876, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%1877 = torch.aten.neg %1876 : !torch.vtensor<[1,32,?,64],bf16> -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %1877, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%1878 = torch.prim.ListConstruct %1877, %1875 : (!torch.vtensor<[1,32,?,64],bf16>, !torch.vtensor<[1,32,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_1642 = torch.constant.int -1 | |
%1879 = torch.aten.cat %1878, %int-1_1642 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1879, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%1880 = torch.aten.mul.Tensor %1879, %1873 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1880, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1643 = torch.constant.int 1 | |
%1881 = torch.aten.add.Tensor %1874, %1880, %int1_1643 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %1881, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1644 = torch.constant.int 1 | |
%int2_1645 = torch.constant.int 2 | |
%1882 = torch.aten.transpose.int %1881, %int1_1644, %int2_1645 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %1882, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int131072_1646 = torch.constant.int 131072 | |
%none_1647 = torch.constant.none | |
%none_1648 = torch.constant.none | |
%cpu_1649 = torch.constant.device "cpu" | |
%false_1650 = torch.constant.bool false | |
%1883 = torch.aten.arange %int131072_1646, %none_1647, %none_1648, %cpu_1649, %false_1650 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_1651 = torch.constant.int 0 | |
%int128_1652 = torch.constant.int 128 | |
%int2_1653 = torch.constant.int 2 | |
%int4_1654 = torch.constant.int 4 | |
%none_1655 = torch.constant.none | |
%cpu_1656 = torch.constant.device "cpu" | |
%false_1657 = torch.constant.bool false | |
%1884 = torch.aten.arange.start_step %int0_1651, %int128_1652, %int2_1653, %int4_1654, %none_1655, %cpu_1656, %false_1657 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_1658 = torch.constant.int 6 | |
%1885 = torch.prims.convert_element_type %1884, %int6_1658 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_1659 = torch.constant.int 128 | |
%1886 = torch.aten.div.Scalar %1885, %int128_1659 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_1660 = torch.constant.float 5.000000e+05 | |
%1887 = torch.aten.pow.Scalar %float5.000000e05_1660, %1886 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1888 = torch.aten.reciprocal %1887 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_1661 = torch.constant.float 1.000000e+00 | |
%1889 = torch.aten.mul.Scalar %1888, %float1.000000e00_1661 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%1890 = torch.aten.reciprocal %1889 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_1662 = torch.constant.float 6.2831853071795862 | |
%1891 = torch.aten.mul.Scalar %1890, %float6.283190e00_1662 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_1663 = torch.constant.float 8.192000e+03 | |
%1892 = torch.aten.gt.Scalar %1891, %float8.192000e03_1663 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_1664 = torch.constant.int 8 | |
%1893 = torch.aten.div.Scalar %1889, %int8_1664 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1894 = torch.aten.where.self %1892, %1893, %1889 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1895 = torch.aten.reciprocal %1891 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_1665 = torch.constant.int 8192 | |
%1896 = torch.aten.mul.Scalar %1895, %int8192_1665 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1666 = torch.constant.int 1 | |
%int1_1667 = torch.constant.int 1 | |
%1897 = torch.aten.sub.Scalar %1896, %int1_1666, %int1_1667 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_1668 = torch.constant.int 3 | |
%1898 = torch.aten.div.Scalar %1897, %int3_1668 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1669 = torch.constant.int 1 | |
%int1_1670 = torch.constant.int 1 | |
%1899 = torch.aten.rsub.Scalar %1898, %int1_1669, %int1_1670 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%1900 = torch.aten.mul.Tensor %1899, %1894 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_1671 = torch.constant.int 8 | |
%1901 = torch.aten.div.Scalar %1900, %int8_1671 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%1902 = torch.aten.mul.Tensor %1898, %1894 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_1672 = torch.constant.int 1 | |
%1903 = torch.aten.add.Tensor %1901, %1902, %int1_1672 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_1673 = torch.constant.float 2.048000e+03 | |
%1904 = torch.aten.lt.Scalar %1891, %float2.048000e03_1673 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1905 = torch.aten.bitwise_not %1904 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_1674 = torch.constant.float 8.192000e+03 | |
%1906 = torch.aten.gt.Scalar %1891, %float8.192000e03_1674 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%1907 = torch.aten.bitwise_not %1906 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1908 = torch.aten.mul.Tensor %1905, %1907 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%1909 = torch.aten.where.self %1908, %1903, %1894 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%1910 = torch.prim.ListConstruct %1909, %1909 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_1675 = torch.constant.int -1 | |
%1911 = torch.aten.cat %1910, %int-1_1675 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_1676 = torch.constant.int 6 | |
%1912 = torch.prims.convert_element_type %1883, %int6_1676 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_1677 = torch.constant.int 131072 | |
%int1_1678 = torch.constant.int 1 | |
%1913 = torch.prim.ListConstruct %int131072_1677, %int1_1678 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1914 = torch.aten.view %1912, %1913 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%1915 = torch.aten.mul.Tensor %1914, %1911 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%1916 = torch.aten.cos %1915 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1679 = torch.constant.int 15 | |
%1917 = torch.prims.convert_element_type %1916, %int15_1679 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%1918 = torch.aten.sin %1915 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1680 = torch.constant.int 15 | |
%1919 = torch.prims.convert_element_type %1918, %int15_1680 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_1681 = torch.constant.int 1 | |
%1920 = torch.aten.size.int %1797, %int1_1681 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_1682 = torch.constant.int 0 | |
%1921 = torch.aten.add.int %int0_1682, %1920 : !torch.int, !torch.int -> !torch.int | |
%int0_1683 = torch.constant.int 0 | |
%int0_1684 = torch.constant.int 0 | |
%int1_1685 = torch.constant.int 1 | |
%1922 = torch.aten.slice.Tensor %1917, %int0_1683, %int0_1684, %1921, %int1_1685 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1922, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1686 = torch.constant.int 1 | |
%int0_1687 = torch.constant.int 0 | |
%int9223372036854775807_1688 = torch.constant.int 9223372036854775807 | |
%int1_1689 = torch.constant.int 1 | |
%1923 = torch.aten.slice.Tensor %1922, %int1_1686, %int0_1687, %int9223372036854775807_1688, %int1_1689 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1923, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1690 = torch.constant.int 0 | |
%1924 = torch.aten.add.int %int0_1690, %1920 : !torch.int, !torch.int -> !torch.int | |
%int0_1691 = torch.constant.int 0 | |
%int0_1692 = torch.constant.int 0 | |
%int1_1693 = torch.constant.int 1 | |
%1925 = torch.aten.slice.Tensor %1919, %int0_1691, %int0_1692, %1924, %int1_1693 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1925, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1694 = torch.constant.int 1 | |
%int0_1695 = torch.constant.int 0 | |
%int9223372036854775807_1696 = torch.constant.int 9223372036854775807 | |
%int1_1697 = torch.constant.int 1 | |
%1926 = torch.aten.slice.Tensor %1925, %int1_1694, %int0_1695, %int9223372036854775807_1696, %int1_1697 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %1926, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1698 = torch.constant.int 0 | |
%1927 = torch.aten.unsqueeze %1923, %int0_1698 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1927, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1699 = torch.constant.int 1 | |
%int0_1700 = torch.constant.int 0 | |
%int9223372036854775807_1701 = torch.constant.int 9223372036854775807 | |
%int1_1702 = torch.constant.int 1 | |
%1928 = torch.aten.slice.Tensor %1927, %int1_1699, %int0_1700, %int9223372036854775807_1701, %int1_1702 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1928, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1703 = torch.constant.int 2 | |
%1929 = torch.aten.unsqueeze %1928, %int2_1703 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1929, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1704 = torch.constant.int 3 | |
%int0_1705 = torch.constant.int 0 | |
%int9223372036854775807_1706 = torch.constant.int 9223372036854775807 | |
%int1_1707 = torch.constant.int 1 | |
%1930 = torch.aten.slice.Tensor %1929, %int3_1704, %int0_1705, %int9223372036854775807_1706, %int1_1707 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1930, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_1708 = torch.constant.int 0 | |
%1931 = torch.aten.unsqueeze %1926, %int0_1708 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1931, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1709 = torch.constant.int 1 | |
%int0_1710 = torch.constant.int 0 | |
%int9223372036854775807_1711 = torch.constant.int 9223372036854775807 | |
%int1_1712 = torch.constant.int 1 | |
%1932 = torch.aten.slice.Tensor %1931, %int1_1709, %int0_1710, %int9223372036854775807_1711, %int1_1712 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %1932, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1713 = torch.constant.int 2 | |
%1933 = torch.aten.unsqueeze %1932, %int2_1713 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1933, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1714 = torch.constant.int 3 | |
%int0_1715 = torch.constant.int 0 | |
%int9223372036854775807_1716 = torch.constant.int 9223372036854775807 | |
%int1_1717 = torch.constant.int 1 | |
%1934 = torch.aten.slice.Tensor %1933, %int3_1714, %int0_1715, %int9223372036854775807_1716, %int1_1717 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %1934, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_1718 = torch.constant.int 1 | |
%int2_1719 = torch.constant.int 2 | |
%1935 = torch.aten.transpose.int %1930, %int1_1718, %int2_1719 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1935, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1720 = torch.constant.int 1 | |
%int1_1721 = torch.constant.int 1 | |
%int1_1722 = torch.constant.int 1 | |
%int1_1723 = torch.constant.int 1 | |
%1936 = torch.prim.ListConstruct %int1_1720, %int1_1721, %int1_1722, %int1_1723 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1937 = torch.aten.repeat %1935, %1936 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1937, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1724 = torch.constant.int 1 | |
%int2_1725 = torch.constant.int 2 | |
%1938 = torch.aten.transpose.int %1934, %int1_1724, %int2_1725 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1938, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_1726 = torch.constant.int 1 | |
%int2_1727 = torch.constant.int 2 | |
%1939 = torch.aten.transpose.int %1812, %int1_1726, %int2_1727 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1939, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_1728 = torch.constant.int 1 | |
%int1_1729 = torch.constant.int 1 | |
%int1_1730 = torch.constant.int 1 | |
%int1_1731 = torch.constant.int 1 | |
%1940 = torch.prim.ListConstruct %int1_1728, %int1_1729, %int1_1730, %int1_1731 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1941 = torch.aten.repeat %1938, %1940 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %1941, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%1942 = torch.aten.mul.Tensor %1939, %1937 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1942, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int3_1732 = torch.constant.int 3 | |
%int0_1733 = torch.constant.int 0 | |
%int64_1734 = torch.constant.int 64 | |
%int1_1735 = torch.constant.int 1 | |
%1943 = torch.aten.slice.Tensor %1939, %int3_1732, %int0_1733, %int64_1734, %int1_1735 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1943, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%int3_1736 = torch.constant.int 3 | |
%int64_1737 = torch.constant.int 64 | |
%int9223372036854775807_1738 = torch.constant.int 9223372036854775807 | |
%int1_1739 = torch.constant.int 1 | |
%1944 = torch.aten.slice.Tensor %1939, %int3_1736, %int64_1737, %int9223372036854775807_1738, %int1_1739 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1944, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%1945 = torch.aten.neg %1944 : !torch.vtensor<[1,8,?,64],bf16> -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %1945, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%1946 = torch.prim.ListConstruct %1945, %1943 : (!torch.vtensor<[1,8,?,64],bf16>, !torch.vtensor<[1,8,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_1740 = torch.constant.int -1 | |
%1947 = torch.aten.cat %1946, %int-1_1740 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1947, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%1948 = torch.aten.mul.Tensor %1947, %1941 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1948, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_1741 = torch.constant.int 1 | |
%1949 = torch.aten.add.Tensor %1942, %1948, %int1_1741 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,8,?,128],bf16>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %1949, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_1742 = torch.constant.int 1 | |
%int2_1743 = torch.constant.int 2 | |
%1950 = torch.aten.transpose.int %1949, %int1_1742, %int2_1743 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1950, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%1951 = torch.aten.div.Tensor %1950, %76 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1951, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_1744 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1745 = torch.constant.float 2.400000e+02 | |
%1952 = torch.aten.clamp %1951, %float-2.400000e02_1744, %float2.400000e02_1745 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1952, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_1746 = torch.constant.int 26 | |
%1953 = torch.prims.convert_element_type %1952, %int26_1746 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1953, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%1954 = torch.aten.div.Tensor %1814, %76 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1954, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_1747 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1748 = torch.constant.float 2.400000e+02 | |
%1955 = torch.aten.clamp %1954, %float-2.400000e02_1747, %float2.400000e02_1748 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %1955, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_1749 = torch.constant.int 26 | |
%1956 = torch.prims.convert_element_type %1955, %int26_1749 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1956, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%int64_1750 = torch.constant.int 64 | |
%1957 = torch.aten.mul.Scalar %arg2, %int64_1750 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1957, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int8_1751 = torch.constant.int 8 | |
%int1_1752 = torch.constant.int 1 | |
%1958 = torch.aten.add.Scalar %1957, %int8_1751, %int1_1752 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1958, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int1_1753 = torch.constant.int 1 | |
%int32_1754 = torch.constant.int 32 | |
%int8_1755 = torch.constant.int 8 | |
%int128_1756 = torch.constant.int 128 | |
%1959 = torch.prim.ListConstruct %int1_1753, %748, %int32_1754, %int8_1755, %int128_1756 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1960 = torch.aten.view %1953, %1959 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1960, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_1757 = torch.constant.int 32 | |
%int8_1758 = torch.constant.int 8 | |
%int128_1759 = torch.constant.int 128 | |
%1961 = torch.prim.ListConstruct %748, %int32_1757, %int8_1758, %int128_1759 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1962 = torch.aten.view %1960, %1961 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1962, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%1963 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%1964 = torch.aten.view %1958, %1963 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %1964, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%int32_1760 = torch.constant.int 32 | |
%int2_1761 = torch.constant.int 2 | |
%int32_1762 = torch.constant.int 32 | |
%int8_1763 = torch.constant.int 8 | |
%int128_1764 = torch.constant.int 128 | |
%1965 = torch.prim.ListConstruct %739, %int32_1760, %int2_1761, %int32_1762, %int8_1763, %int128_1764 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1966 = torch.aten.view %1689, %1965 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1966, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_1765 = torch.constant.int 32 | |
%1967 = torch.aten.mul.int %739, %int32_1765 : !torch.int, !torch.int -> !torch.int | |
%int2_1766 = torch.constant.int 2 | |
%1968 = torch.aten.mul.int %1967, %int2_1766 : !torch.int, !torch.int -> !torch.int | |
%int32_1767 = torch.constant.int 32 | |
%int8_1768 = torch.constant.int 8 | |
%int128_1769 = torch.constant.int 128 | |
%1969 = torch.prim.ListConstruct %1968, %int32_1767, %int8_1768, %int128_1769 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1970 = torch.aten.view %1966, %1969 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1970, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%1971 = torch.prim.ListConstruct %1964 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_1770 = torch.constant.bool false | |
%1972 = torch.aten.index_put %1970, %1971, %1962, %false_1770 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1972, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_1771 = torch.constant.int 32 | |
%int2_1772 = torch.constant.int 2 | |
%int32_1773 = torch.constant.int 32 | |
%int8_1774 = torch.constant.int 8 | |
%int128_1775 = torch.constant.int 128 | |
%1973 = torch.prim.ListConstruct %739, %int32_1771, %int2_1772, %int32_1773, %int8_1774, %int128_1775 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1974 = torch.aten.view %1972, %1973 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1974, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_1776 = torch.constant.int 2097152 | |
%1975 = torch.prim.ListConstruct %739, %int2097152_1776 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1976 = torch.aten.view %1974, %1975 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1976, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int32_1777 = torch.constant.int 32 | |
%int2_1778 = torch.constant.int 2 | |
%int32_1779 = torch.constant.int 32 | |
%int8_1780 = torch.constant.int 8 | |
%int128_1781 = torch.constant.int 128 | |
%1977 = torch.prim.ListConstruct %739, %int32_1777, %int2_1778, %int32_1779, %int8_1780, %int128_1781 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1978 = torch.aten.view %1976, %1977 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1978, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_1782 = torch.constant.int 32 | |
%int8_1783 = torch.constant.int 8 | |
%int128_1784 = torch.constant.int 128 | |
%1979 = torch.prim.ListConstruct %1968, %int32_1782, %int8_1783, %int128_1784 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1980 = torch.aten.view %1978, %1979 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1980, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_1785 = torch.constant.int 1 | |
%int32_1786 = torch.constant.int 32 | |
%int8_1787 = torch.constant.int 8 | |
%int128_1788 = torch.constant.int 128 | |
%1981 = torch.prim.ListConstruct %int1_1785, %748, %int32_1786, %int8_1787, %int128_1788 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1982 = torch.aten.view %1956, %1981 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1982, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_1789 = torch.constant.int 32 | |
%int8_1790 = torch.constant.int 8 | |
%int128_1791 = torch.constant.int 128 | |
%1983 = torch.prim.ListConstruct %748, %int32_1789, %int8_1790, %int128_1791 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1984 = torch.aten.view %1982, %1983 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1984, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_1792 = torch.constant.int 1 | |
%int1_1793 = torch.constant.int 1 | |
%1985 = torch.aten.add.Scalar %1958, %int1_1792, %int1_1793 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %1985, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%1986 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%1987 = torch.aten.view %1985, %1986 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %1987, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%1988 = torch.prim.ListConstruct %1987 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_1794 = torch.constant.bool false | |
%1989 = torch.aten.index_put %1980, %1988, %1984, %false_1794 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1989, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_1795 = torch.constant.int 32 | |
%int2_1796 = torch.constant.int 2 | |
%int32_1797 = torch.constant.int 32 | |
%int8_1798 = torch.constant.int 8 | |
%int128_1799 = torch.constant.int 128 | |
%1990 = torch.prim.ListConstruct %739, %int32_1795, %int2_1796, %int32_1797, %int8_1798, %int128_1799 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1991 = torch.aten.view %1989, %1990 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1991, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_1800 = torch.constant.int 2097152 | |
%1992 = torch.prim.ListConstruct %739, %int2097152_1800 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1993 = torch.aten.view %1991, %1992 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1993, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int-2_1801 = torch.constant.int -2 | |
%1994 = torch.aten.unsqueeze %1953, %int-2_1801 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1994, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_1802 = torch.constant.int 1 | |
%int8_1803 = torch.constant.int 8 | |
%int4_1804 = torch.constant.int 4 | |
%int128_1805 = torch.constant.int 128 | |
%1995 = torch.prim.ListConstruct %int1_1802, %1920, %int8_1803, %int4_1804, %int128_1805 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_1806 = torch.constant.bool false | |
%1996 = torch.aten.expand %1994, %1995, %false_1806 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1996, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_1807 = torch.constant.int 0 | |
%1997 = torch.aten.clone %1996, %int0_1807 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1997, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_1808 = torch.constant.int 1 | |
%int32_1809 = torch.constant.int 32 | |
%int128_1810 = torch.constant.int 128 | |
%1998 = torch.prim.ListConstruct %int1_1808, %1920, %int32_1809, %int128_1810 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1999 = torch.aten._unsafe_view %1997, %1998 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %1999, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int-2_1811 = torch.constant.int -2 | |
%2000 = torch.aten.unsqueeze %1956, %int-2_1811 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2000, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_1812 = torch.constant.int 1 | |
%2001 = torch.aten.size.int %1807, %int1_1812 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int1_1813 = torch.constant.int 1 | |
%int8_1814 = torch.constant.int 8 | |
%int4_1815 = torch.constant.int 4 | |
%int128_1816 = torch.constant.int 128 | |
%2002 = torch.prim.ListConstruct %int1_1813, %2001, %int8_1814, %int4_1815, %int128_1816 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_1817 = torch.constant.bool false | |
%2003 = torch.aten.expand %2000, %2002, %false_1817 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2003, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_1818 = torch.constant.int 0 | |
%2004 = torch.aten.clone %2003, %int0_1818 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2004, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_1819 = torch.constant.int 1 | |
%int32_1820 = torch.constant.int 32 | |
%int128_1821 = torch.constant.int 128 | |
%2005 = torch.prim.ListConstruct %int1_1819, %2001, %int32_1820, %int128_1821 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2006 = torch.aten._unsafe_view %2004, %2005 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2006, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int6_1822 = torch.constant.int 6 | |
%2007 = torch.prims.convert_element_type %1999, %int6_1822 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2007, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%2008 = torch.aten.mul.Tensor %2007, %76 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2008, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_1823 = torch.constant.int 15 | |
%2009 = torch.prims.convert_element_type %2008, %int15_1823 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2009, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int6_1824 = torch.constant.int 6 | |
%2010 = torch.prims.convert_element_type %2006, %int6_1824 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2010, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%2011 = torch.aten.mul.Tensor %2010, %76 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2011, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_1825 = torch.constant.int 15 | |
%2012 = torch.prims.convert_element_type %2011, %int15_1825 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2012, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_1826 = torch.constant.int 1 | |
%int2_1827 = torch.constant.int 2 | |
%2013 = torch.aten.transpose.int %1882, %int1_1826, %int2_1827 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2013, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1828 = torch.constant.int 1 | |
%int2_1829 = torch.constant.int 2 | |
%2014 = torch.aten.transpose.int %2009, %int1_1828, %int2_1829 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2014, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1830 = torch.constant.int 1 | |
%int2_1831 = torch.constant.int 2 | |
%2015 = torch.aten.transpose.int %2012, %int1_1830, %int2_1831 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2015, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%float0.000000e00_1832 = torch.constant.float 0.000000e+00 | |
%true_1833 = torch.constant.bool true | |
%none_1834 = torch.constant.none | |
%none_1835 = torch.constant.none | |
%2016:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%2013, %2014, %2015, %float0.000000e00_1832, %true_1833, %none_1834, %none_1835) : (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?],f32>) | |
torch.bind_symbolic_shape %2016#0, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_1836 = torch.constant.int 1 | |
%int2_1837 = torch.constant.int 2 | |
%2017 = torch.aten.transpose.int %2016#0, %int1_1836, %int2_1837 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2017, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_1838 = torch.constant.int 1 | |
%int4096_1839 = torch.constant.int 4096 | |
%2018 = torch.prim.ListConstruct %int1_1838, %1852, %int4096_1839 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2019 = torch.aten.view %2017, %2018 : !torch.vtensor<[1,?,32,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2019, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2020 = torch.aten.div.Tensor %2019, %77 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2020, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1840 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1841 = torch.constant.float 2.400000e+02 | |
%2021 = torch.aten.clamp %2020, %float-2.400000e02_1840, %float2.400000e02_1841 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2021, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1842 = torch.constant.int 26 | |
%2022 = torch.prims.convert_element_type %2021, %int26_1842 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2022, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1843 = torch.constant.int -2 | |
%int-1_1844 = torch.constant.int -1 | |
%2023 = torch.aten.transpose.int %78, %int-2_1843, %int-1_1844 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_1845 = torch.constant.int 4096 | |
%2024 = torch.prim.ListConstruct %1852, %int4096_1845 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2025 = torch.aten.view %2022, %2024 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2025, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2026 = torch.aten.mm %2025, %2023 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2026, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_1846 = torch.constant.int 1 | |
%int4096_1847 = torch.constant.int 4096 | |
%2027 = torch.prim.ListConstruct %int1_1846, %1852, %int4096_1847 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2028 = torch.aten.view %2026, %2027 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2028, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_1848 = torch.constant.int 15 | |
%2029 = torch.prims.convert_element_type %2028, %int15_1848 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2029, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_1849 = torch.constant.int 1 | |
%2030 = torch.aten.add.Tensor %1769, %2029, %int1_1849 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2030, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_1850 = torch.constant.int 6 | |
%2031 = torch.prims.convert_element_type %2030, %int6_1850 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2031, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_1851 = torch.constant.int 2 | |
%2032 = torch.aten.pow.Tensor_Scalar %2031, %int2_1851 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2032, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_1852 = torch.constant.int -1 | |
%2033 = torch.prim.ListConstruct %int-1_1852 : (!torch.int) -> !torch.list<int> | |
%true_1853 = torch.constant.bool true | |
%none_1854 = torch.constant.none | |
%2034 = torch.aten.mean.dim %2032, %2033, %true_1853, %none_1854 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2034, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_1855 = torch.constant.float 1.000000e-05 | |
%int1_1856 = torch.constant.int 1 | |
%2035 = torch.aten.add.Scalar %2034, %float1.000000e-05_1855, %int1_1856 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2035, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2036 = torch.aten.rsqrt %2035 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2036, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2037 = torch.aten.mul.Tensor %2031, %2036 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2037, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_1857 = torch.constant.int 15 | |
%2038 = torch.prims.convert_element_type %2037, %int15_1857 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2038, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2039 = torch.aten.mul.Tensor %79, %2038 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2039, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2040 = torch.aten.div.Tensor %2039, %80 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2040, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1858 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1859 = torch.constant.float 2.400000e+02 | |
%2041 = torch.aten.clamp %2040, %float-2.400000e02_1858, %float2.400000e02_1859 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2041, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1860 = torch.constant.int 26 | |
%2042 = torch.prims.convert_element_type %2041, %int26_1860 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2042, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1861 = torch.constant.int -2 | |
%int-1_1862 = torch.constant.int -1 | |
%2043 = torch.aten.transpose.int %81, %int-2_1861, %int-1_1862 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_1863 = torch.constant.int 4096 | |
%2044 = torch.prim.ListConstruct %564, %int4096_1863 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2045 = torch.aten.view %2042, %2044 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2045, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2046 = torch.aten.mm %2045, %2043 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2046, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_1864 = torch.constant.int 1 | |
%int14336_1865 = torch.constant.int 14336 | |
%2047 = torch.prim.ListConstruct %int1_1864, %564, %int14336_1865 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2048 = torch.aten.view %2046, %2047 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2048, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_1866 = torch.constant.int 15 | |
%2049 = torch.prims.convert_element_type %2048, %int15_1866 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2049, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2050 = torch.aten.silu %2049 : !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2050, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2051 = torch.aten.div.Tensor %2039, %82 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2051, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1867 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1868 = torch.constant.float 2.400000e+02 | |
%2052 = torch.aten.clamp %2051, %float-2.400000e02_1867, %float2.400000e02_1868 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2052, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1869 = torch.constant.int 26 | |
%2053 = torch.prims.convert_element_type %2052, %int26_1869 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2053, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1870 = torch.constant.int -2 | |
%int-1_1871 = torch.constant.int -1 | |
%2054 = torch.aten.transpose.int %83, %int-2_1870, %int-1_1871 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_1872 = torch.constant.int 4096 | |
%2055 = torch.prim.ListConstruct %564, %int4096_1872 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2056 = torch.aten.view %2053, %2055 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2056, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2057 = torch.aten.mm %2056, %2054 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2057, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_1873 = torch.constant.int 1 | |
%int14336_1874 = torch.constant.int 14336 | |
%2058 = torch.prim.ListConstruct %int1_1873, %564, %int14336_1874 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2059 = torch.aten.view %2057, %2058 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2059, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_1875 = torch.constant.int 15 | |
%2060 = torch.prims.convert_element_type %2059, %int15_1875 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2060, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2061 = torch.aten.mul.Tensor %2050, %2060 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2061, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2062 = torch.aten.div.Tensor %2061, %84 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2062, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%float-2.400000e02_1876 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1877 = torch.constant.float 2.400000e+02 | |
%2063 = torch.aten.clamp %2062, %float-2.400000e02_1876, %float2.400000e02_1877 : !torch.vtensor<[1,?,14336],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2063, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%int26_1878 = torch.constant.int 26 | |
%2064 = torch.prims.convert_element_type %2063, %int26_1878 : !torch.vtensor<[1,?,14336],bf16>, !torch.int -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2064, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int-2_1879 = torch.constant.int -2 | |
%int-1_1880 = torch.constant.int -1 | |
%2065 = torch.aten.transpose.int %85, %int-2_1879, %int-1_1880 : !torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%int1_1881 = torch.constant.int 1 | |
%2066 = torch.aten.size.int %2048, %int1_1881 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int14336_1882 = torch.constant.int 14336 | |
%2067 = torch.prim.ListConstruct %2066, %int14336_1882 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2068 = torch.aten.view %2064, %2067 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2068, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%2069 = torch.aten.mm %2068, %2065 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.vtensor<[14336,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2069, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_1883 = torch.constant.int 1 | |
%int4096_1884 = torch.constant.int 4096 | |
%2070 = torch.prim.ListConstruct %int1_1883, %2066, %int4096_1884 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2071 = torch.aten.view %2069, %2070 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2071, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_1885 = torch.constant.int 15 | |
%2072 = torch.prims.convert_element_type %2071, %int15_1885 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2072, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_1886 = torch.constant.int 1 | |
%2073 = torch.aten.add.Tensor %2030, %2072, %int1_1886 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2073, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_1887 = torch.constant.int 6 | |
%2074 = torch.prims.convert_element_type %2073, %int6_1887 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2074, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_1888 = torch.constant.int 2 | |
%2075 = torch.aten.pow.Tensor_Scalar %2074, %int2_1888 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2075, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_1889 = torch.constant.int -1 | |
%2076 = torch.prim.ListConstruct %int-1_1889 : (!torch.int) -> !torch.list<int> | |
%true_1890 = torch.constant.bool true | |
%none_1891 = torch.constant.none | |
%2077 = torch.aten.mean.dim %2075, %2076, %true_1890, %none_1891 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2077, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_1892 = torch.constant.float 1.000000e-05 | |
%int1_1893 = torch.constant.int 1 | |
%2078 = torch.aten.add.Scalar %2077, %float1.000000e-05_1892, %int1_1893 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2078, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2079 = torch.aten.rsqrt %2078 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2079, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2080 = torch.aten.mul.Tensor %2074, %2079 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2080, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_1894 = torch.constant.int 15 | |
%2081 = torch.prims.convert_element_type %2080, %int15_1894 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2081, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2082 = torch.aten.mul.Tensor %86, %2081 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2082, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2083 = torch.aten.div.Tensor %2082, %87 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2083, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1895 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1896 = torch.constant.float 2.400000e+02 | |
%2084 = torch.aten.clamp %2083, %float-2.400000e02_1895, %float2.400000e02_1896 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2084, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1897 = torch.constant.int 26 | |
%2085 = torch.prims.convert_element_type %2084, %int26_1897 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2085, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1898 = torch.constant.int -2 | |
%int-1_1899 = torch.constant.int -1 | |
%2086 = torch.aten.transpose.int %88, %int-2_1898, %int-1_1899 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_1900 = torch.constant.int 4096 | |
%2087 = torch.prim.ListConstruct %564, %int4096_1900 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2088 = torch.aten.view %2085, %2087 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2088, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2089 = torch.aten.mm %2088, %2086 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2089, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_1901 = torch.constant.int 1 | |
%int4096_1902 = torch.constant.int 4096 | |
%2090 = torch.prim.ListConstruct %int1_1901, %564, %int4096_1902 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2091 = torch.aten.view %2089, %2090 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2091, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_1903 = torch.constant.int 15 | |
%2092 = torch.prims.convert_element_type %2091, %int15_1903 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2092, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2093 = torch.aten.div.Tensor %2082, %89 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2093, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1904 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1905 = torch.constant.float 2.400000e+02 | |
%2094 = torch.aten.clamp %2093, %float-2.400000e02_1904, %float2.400000e02_1905 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2094, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1906 = torch.constant.int 26 | |
%2095 = torch.prims.convert_element_type %2094, %int26_1906 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2095, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1907 = torch.constant.int -2 | |
%int-1_1908 = torch.constant.int -1 | |
%2096 = torch.aten.transpose.int %90, %int-2_1907, %int-1_1908 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_1909 = torch.constant.int 4096 | |
%2097 = torch.prim.ListConstruct %564, %int4096_1909 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2098 = torch.aten.view %2095, %2097 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2098, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2099 = torch.aten.mm %2098, %2096 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2099, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_1910 = torch.constant.int 1 | |
%int1024_1911 = torch.constant.int 1024 | |
%2100 = torch.prim.ListConstruct %int1_1910, %564, %int1024_1911 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2101 = torch.aten.view %2099, %2100 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2101, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_1912 = torch.constant.int 15 | |
%2102 = torch.prims.convert_element_type %2101, %int15_1912 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %2102, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%2103 = torch.aten.div.Tensor %2082, %91 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2103, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_1913 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_1914 = torch.constant.float 2.400000e+02 | |
%2104 = torch.aten.clamp %2103, %float-2.400000e02_1913, %float2.400000e02_1914 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2104, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_1915 = torch.constant.int 26 | |
%2105 = torch.prims.convert_element_type %2104, %int26_1915 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2105, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_1916 = torch.constant.int -2 | |
%int-1_1917 = torch.constant.int -1 | |
%2106 = torch.aten.transpose.int %92, %int-2_1916, %int-1_1917 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_1918 = torch.constant.int 4096 | |
%2107 = torch.prim.ListConstruct %564, %int4096_1918 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2108 = torch.aten.view %2105, %2107 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2108, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2109 = torch.aten.mm %2108, %2106 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2109, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_1919 = torch.constant.int 1 | |
%int1024_1920 = torch.constant.int 1024 | |
%2110 = torch.prim.ListConstruct %int1_1919, %564, %int1024_1920 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2111 = torch.aten.view %2109, %2110 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2111, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_1921 = torch.constant.int 15 | |
%2112 = torch.prims.convert_element_type %2111, %int15_1921 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %2112, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%int1_1922 = torch.constant.int 1 | |
%int32_1923 = torch.constant.int 32 | |
%int128_1924 = torch.constant.int 128 | |
%2113 = torch.prim.ListConstruct %int1_1922, %564, %int32_1923, %int128_1924 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2114 = torch.aten.view %2092, %2113 : !torch.vtensor<[1,?,4096],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2114, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_1925 = torch.constant.int 1 | |
%int8_1926 = torch.constant.int 8 | |
%int128_1927 = torch.constant.int 128 | |
%2115 = torch.prim.ListConstruct %int1_1925, %564, %int8_1926, %int128_1927 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2116 = torch.aten.view %2102, %2115 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2116, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int1_1928 = torch.constant.int 1 | |
%int8_1929 = torch.constant.int 8 | |
%int128_1930 = torch.constant.int 128 | |
%2117 = torch.prim.ListConstruct %int1_1928, %564, %int8_1929, %int128_1930 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2118 = torch.aten.view %2112, %2117 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2118, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int131072_1931 = torch.constant.int 131072 | |
%none_1932 = torch.constant.none | |
%none_1933 = torch.constant.none | |
%cpu_1934 = torch.constant.device "cpu" | |
%false_1935 = torch.constant.bool false | |
%2119 = torch.aten.arange %int131072_1931, %none_1932, %none_1933, %cpu_1934, %false_1935 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_1936 = torch.constant.int 0 | |
%int128_1937 = torch.constant.int 128 | |
%int2_1938 = torch.constant.int 2 | |
%int4_1939 = torch.constant.int 4 | |
%none_1940 = torch.constant.none | |
%cpu_1941 = torch.constant.device "cpu" | |
%false_1942 = torch.constant.bool false | |
%2120 = torch.aten.arange.start_step %int0_1936, %int128_1937, %int2_1938, %int4_1939, %none_1940, %cpu_1941, %false_1942 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_1943 = torch.constant.int 6 | |
%2121 = torch.prims.convert_element_type %2120, %int6_1943 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_1944 = torch.constant.int 128 | |
%2122 = torch.aten.div.Scalar %2121, %int128_1944 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_1945 = torch.constant.float 5.000000e+05 | |
%2123 = torch.aten.pow.Scalar %float5.000000e05_1945, %2122 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2124 = torch.aten.reciprocal %2123 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_1946 = torch.constant.float 1.000000e+00 | |
%2125 = torch.aten.mul.Scalar %2124, %float1.000000e00_1946 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%2126 = torch.aten.reciprocal %2125 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_1947 = torch.constant.float 6.2831853071795862 | |
%2127 = torch.aten.mul.Scalar %2126, %float6.283190e00_1947 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_1948 = torch.constant.float 8.192000e+03 | |
%2128 = torch.aten.gt.Scalar %2127, %float8.192000e03_1948 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_1949 = torch.constant.int 8 | |
%2129 = torch.aten.div.Scalar %2125, %int8_1949 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2130 = torch.aten.where.self %2128, %2129, %2125 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2131 = torch.aten.reciprocal %2127 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_1950 = torch.constant.int 8192 | |
%2132 = torch.aten.mul.Scalar %2131, %int8192_1950 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1951 = torch.constant.int 1 | |
%int1_1952 = torch.constant.int 1 | |
%2133 = torch.aten.sub.Scalar %2132, %int1_1951, %int1_1952 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_1953 = torch.constant.int 3 | |
%2134 = torch.aten.div.Scalar %2133, %int3_1953 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_1954 = torch.constant.int 1 | |
%int1_1955 = torch.constant.int 1 | |
%2135 = torch.aten.rsub.Scalar %2134, %int1_1954, %int1_1955 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%2136 = torch.aten.mul.Tensor %2135, %2130 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_1956 = torch.constant.int 8 | |
%2137 = torch.aten.div.Scalar %2136, %int8_1956 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2138 = torch.aten.mul.Tensor %2134, %2130 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_1957 = torch.constant.int 1 | |
%2139 = torch.aten.add.Tensor %2137, %2138, %int1_1957 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_1958 = torch.constant.float 2.048000e+03 | |
%2140 = torch.aten.lt.Scalar %2127, %float2.048000e03_1958 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2141 = torch.aten.bitwise_not %2140 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_1959 = torch.constant.float 8.192000e+03 | |
%2142 = torch.aten.gt.Scalar %2127, %float8.192000e03_1959 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2143 = torch.aten.bitwise_not %2142 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2144 = torch.aten.mul.Tensor %2141, %2143 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2145 = torch.aten.where.self %2144, %2139, %2130 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2146 = torch.prim.ListConstruct %2145, %2145 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_1960 = torch.constant.int -1 | |
%2147 = torch.aten.cat %2146, %int-1_1960 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_1961 = torch.constant.int 6 | |
%2148 = torch.prims.convert_element_type %2119, %int6_1961 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_1962 = torch.constant.int 131072 | |
%int1_1963 = torch.constant.int 1 | |
%2149 = torch.prim.ListConstruct %int131072_1962, %int1_1963 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2150 = torch.aten.view %2148, %2149 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%2151 = torch.aten.mul.Tensor %2150, %2147 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%2152 = torch.aten.cos %2151 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1964 = torch.constant.int 15 | |
%2153 = torch.prims.convert_element_type %2152, %int15_1964 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%2154 = torch.aten.sin %2151 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_1965 = torch.constant.int 15 | |
%2155 = torch.prims.convert_element_type %2154, %int15_1965 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_1966 = torch.constant.int 1 | |
%2156 = torch.aten.size.int %2091, %int1_1966 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_1967 = torch.constant.int 0 | |
%2157 = torch.aten.add.int %int0_1967, %2156 : !torch.int, !torch.int -> !torch.int | |
%int0_1968 = torch.constant.int 0 | |
%int0_1969 = torch.constant.int 0 | |
%int1_1970 = torch.constant.int 1 | |
%2158 = torch.aten.slice.Tensor %2153, %int0_1968, %int0_1969, %2157, %int1_1970 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2158, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1971 = torch.constant.int 1 | |
%int0_1972 = torch.constant.int 0 | |
%int9223372036854775807_1973 = torch.constant.int 9223372036854775807 | |
%int1_1974 = torch.constant.int 1 | |
%2159 = torch.aten.slice.Tensor %2158, %int1_1971, %int0_1972, %int9223372036854775807_1973, %int1_1974 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2159, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1975 = torch.constant.int 0 | |
%2160 = torch.aten.add.int %int0_1975, %2156 : !torch.int, !torch.int -> !torch.int | |
%int0_1976 = torch.constant.int 0 | |
%int0_1977 = torch.constant.int 0 | |
%int1_1978 = torch.constant.int 1 | |
%2161 = torch.aten.slice.Tensor %2155, %int0_1976, %int0_1977, %2160, %int1_1978 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2161, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_1979 = torch.constant.int 1 | |
%int0_1980 = torch.constant.int 0 | |
%int9223372036854775807_1981 = torch.constant.int 9223372036854775807 | |
%int1_1982 = torch.constant.int 1 | |
%2162 = torch.aten.slice.Tensor %2161, %int1_1979, %int0_1980, %int9223372036854775807_1981, %int1_1982 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2162, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_1983 = torch.constant.int 0 | |
%2163 = torch.aten.unsqueeze %2159, %int0_1983 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2163, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1984 = torch.constant.int 1 | |
%int0_1985 = torch.constant.int 0 | |
%int9223372036854775807_1986 = torch.constant.int 9223372036854775807 | |
%int1_1987 = torch.constant.int 1 | |
%2164 = torch.aten.slice.Tensor %2163, %int1_1984, %int0_1985, %int9223372036854775807_1986, %int1_1987 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2164, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1988 = torch.constant.int 2 | |
%2165 = torch.aten.unsqueeze %2164, %int2_1988 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2165, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1989 = torch.constant.int 3 | |
%int0_1990 = torch.constant.int 0 | |
%int9223372036854775807_1991 = torch.constant.int 9223372036854775807 | |
%int1_1992 = torch.constant.int 1 | |
%2166 = torch.aten.slice.Tensor %2165, %int3_1989, %int0_1990, %int9223372036854775807_1991, %int1_1992 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2166, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_1993 = torch.constant.int 0 | |
%2167 = torch.aten.unsqueeze %2162, %int0_1993 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2167, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_1994 = torch.constant.int 1 | |
%int0_1995 = torch.constant.int 0 | |
%int9223372036854775807_1996 = torch.constant.int 9223372036854775807 | |
%int1_1997 = torch.constant.int 1 | |
%2168 = torch.aten.slice.Tensor %2167, %int1_1994, %int0_1995, %int9223372036854775807_1996, %int1_1997 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2168, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_1998 = torch.constant.int 2 | |
%2169 = torch.aten.unsqueeze %2168, %int2_1998 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2169, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_1999 = torch.constant.int 3 | |
%int0_2000 = torch.constant.int 0 | |
%int9223372036854775807_2001 = torch.constant.int 9223372036854775807 | |
%int1_2002 = torch.constant.int 1 | |
%2170 = torch.aten.slice.Tensor %2169, %int3_1999, %int0_2000, %int9223372036854775807_2001, %int1_2002 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2170, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_2003 = torch.constant.int 1 | |
%int2_2004 = torch.constant.int 2 | |
%2171 = torch.aten.transpose.int %2166, %int1_2003, %int2_2004 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2171, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2005 = torch.constant.int 1 | |
%int1_2006 = torch.constant.int 1 | |
%int1_2007 = torch.constant.int 1 | |
%int1_2008 = torch.constant.int 1 | |
%2172 = torch.prim.ListConstruct %int1_2005, %int1_2006, %int1_2007, %int1_2008 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2173 = torch.aten.repeat %2171, %2172 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2173, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2009 = torch.constant.int 1 | |
%int2_2010 = torch.constant.int 2 | |
%2174 = torch.aten.transpose.int %2170, %int1_2009, %int2_2010 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2174, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2011 = torch.constant.int 1 | |
%int2_2012 = torch.constant.int 2 | |
%2175 = torch.aten.transpose.int %2114, %int1_2011, %int2_2012 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2175, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2013 = torch.constant.int 1 | |
%int1_2014 = torch.constant.int 1 | |
%int1_2015 = torch.constant.int 1 | |
%int1_2016 = torch.constant.int 1 | |
%2176 = torch.prim.ListConstruct %int1_2013, %int1_2014, %int1_2015, %int1_2016 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2177 = torch.aten.repeat %2174, %2176 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2177, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%2178 = torch.aten.mul.Tensor %2175, %2173 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2178, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int3_2017 = torch.constant.int 3 | |
%int0_2018 = torch.constant.int 0 | |
%int64_2019 = torch.constant.int 64 | |
%int1_2020 = torch.constant.int 1 | |
%2179 = torch.aten.slice.Tensor %2175, %int3_2017, %int0_2018, %int64_2019, %int1_2020 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %2179, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%int3_2021 = torch.constant.int 3 | |
%int64_2022 = torch.constant.int 64 | |
%int9223372036854775807_2023 = torch.constant.int 9223372036854775807 | |
%int1_2024 = torch.constant.int 1 | |
%2180 = torch.aten.slice.Tensor %2175, %int3_2021, %int64_2022, %int9223372036854775807_2023, %int1_2024 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %2180, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%2181 = torch.aten.neg %2180 : !torch.vtensor<[1,32,?,64],bf16> -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %2181, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%2182 = torch.prim.ListConstruct %2181, %2179 : (!torch.vtensor<[1,32,?,64],bf16>, !torch.vtensor<[1,32,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_2025 = torch.constant.int -1 | |
%2183 = torch.aten.cat %2182, %int-1_2025 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2183, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%2184 = torch.aten.mul.Tensor %2183, %2177 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2184, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2026 = torch.constant.int 1 | |
%2185 = torch.aten.add.Tensor %2178, %2184, %int1_2026 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2185, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2027 = torch.constant.int 1 | |
%int2_2028 = torch.constant.int 2 | |
%2186 = torch.aten.transpose.int %2185, %int1_2027, %int2_2028 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2186, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int131072_2029 = torch.constant.int 131072 | |
%none_2030 = torch.constant.none | |
%none_2031 = torch.constant.none | |
%cpu_2032 = torch.constant.device "cpu" | |
%false_2033 = torch.constant.bool false | |
%2187 = torch.aten.arange %int131072_2029, %none_2030, %none_2031, %cpu_2032, %false_2033 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_2034 = torch.constant.int 0 | |
%int128_2035 = torch.constant.int 128 | |
%int2_2036 = torch.constant.int 2 | |
%int4_2037 = torch.constant.int 4 | |
%none_2038 = torch.constant.none | |
%cpu_2039 = torch.constant.device "cpu" | |
%false_2040 = torch.constant.bool false | |
%2188 = torch.aten.arange.start_step %int0_2034, %int128_2035, %int2_2036, %int4_2037, %none_2038, %cpu_2039, %false_2040 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_2041 = torch.constant.int 6 | |
%2189 = torch.prims.convert_element_type %2188, %int6_2041 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_2042 = torch.constant.int 128 | |
%2190 = torch.aten.div.Scalar %2189, %int128_2042 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_2043 = torch.constant.float 5.000000e+05 | |
%2191 = torch.aten.pow.Scalar %float5.000000e05_2043, %2190 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2192 = torch.aten.reciprocal %2191 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_2044 = torch.constant.float 1.000000e+00 | |
%2193 = torch.aten.mul.Scalar %2192, %float1.000000e00_2044 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%2194 = torch.aten.reciprocal %2193 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_2045 = torch.constant.float 6.2831853071795862 | |
%2195 = torch.aten.mul.Scalar %2194, %float6.283190e00_2045 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_2046 = torch.constant.float 8.192000e+03 | |
%2196 = torch.aten.gt.Scalar %2195, %float8.192000e03_2046 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_2047 = torch.constant.int 8 | |
%2197 = torch.aten.div.Scalar %2193, %int8_2047 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2198 = torch.aten.where.self %2196, %2197, %2193 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2199 = torch.aten.reciprocal %2195 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_2048 = torch.constant.int 8192 | |
%2200 = torch.aten.mul.Scalar %2199, %int8192_2048 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2049 = torch.constant.int 1 | |
%int1_2050 = torch.constant.int 1 | |
%2201 = torch.aten.sub.Scalar %2200, %int1_2049, %int1_2050 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_2051 = torch.constant.int 3 | |
%2202 = torch.aten.div.Scalar %2201, %int3_2051 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2052 = torch.constant.int 1 | |
%int1_2053 = torch.constant.int 1 | |
%2203 = torch.aten.rsub.Scalar %2202, %int1_2052, %int1_2053 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%2204 = torch.aten.mul.Tensor %2203, %2198 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_2054 = torch.constant.int 8 | |
%2205 = torch.aten.div.Scalar %2204, %int8_2054 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2206 = torch.aten.mul.Tensor %2202, %2198 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_2055 = torch.constant.int 1 | |
%2207 = torch.aten.add.Tensor %2205, %2206, %int1_2055 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_2056 = torch.constant.float 2.048000e+03 | |
%2208 = torch.aten.lt.Scalar %2195, %float2.048000e03_2056 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2209 = torch.aten.bitwise_not %2208 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_2057 = torch.constant.float 8.192000e+03 | |
%2210 = torch.aten.gt.Scalar %2195, %float8.192000e03_2057 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2211 = torch.aten.bitwise_not %2210 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2212 = torch.aten.mul.Tensor %2209, %2211 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2213 = torch.aten.where.self %2212, %2207, %2198 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2214 = torch.prim.ListConstruct %2213, %2213 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_2058 = torch.constant.int -1 | |
%2215 = torch.aten.cat %2214, %int-1_2058 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_2059 = torch.constant.int 6 | |
%2216 = torch.prims.convert_element_type %2187, %int6_2059 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_2060 = torch.constant.int 131072 | |
%int1_2061 = torch.constant.int 1 | |
%2217 = torch.prim.ListConstruct %int131072_2060, %int1_2061 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2218 = torch.aten.view %2216, %2217 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%2219 = torch.aten.mul.Tensor %2218, %2215 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%2220 = torch.aten.cos %2219 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2062 = torch.constant.int 15 | |
%2221 = torch.prims.convert_element_type %2220, %int15_2062 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%2222 = torch.aten.sin %2219 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2063 = torch.constant.int 15 | |
%2223 = torch.prims.convert_element_type %2222, %int15_2063 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_2064 = torch.constant.int 1 | |
%2224 = torch.aten.size.int %2101, %int1_2064 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_2065 = torch.constant.int 0 | |
%2225 = torch.aten.add.int %int0_2065, %2224 : !torch.int, !torch.int -> !torch.int | |
%int0_2066 = torch.constant.int 0 | |
%int0_2067 = torch.constant.int 0 | |
%int1_2068 = torch.constant.int 1 | |
%2226 = torch.aten.slice.Tensor %2221, %int0_2066, %int0_2067, %2225, %int1_2068 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2226, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2069 = torch.constant.int 1 | |
%int0_2070 = torch.constant.int 0 | |
%int9223372036854775807_2071 = torch.constant.int 9223372036854775807 | |
%int1_2072 = torch.constant.int 1 | |
%2227 = torch.aten.slice.Tensor %2226, %int1_2069, %int0_2070, %int9223372036854775807_2071, %int1_2072 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2227, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2073 = torch.constant.int 0 | |
%2228 = torch.aten.add.int %int0_2073, %2224 : !torch.int, !torch.int -> !torch.int | |
%int0_2074 = torch.constant.int 0 | |
%int0_2075 = torch.constant.int 0 | |
%int1_2076 = torch.constant.int 1 | |
%2229 = torch.aten.slice.Tensor %2223, %int0_2074, %int0_2075, %2228, %int1_2076 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2229, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2077 = torch.constant.int 1 | |
%int0_2078 = torch.constant.int 0 | |
%int9223372036854775807_2079 = torch.constant.int 9223372036854775807 | |
%int1_2080 = torch.constant.int 1 | |
%2230 = torch.aten.slice.Tensor %2229, %int1_2077, %int0_2078, %int9223372036854775807_2079, %int1_2080 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2230, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2081 = torch.constant.int 0 | |
%2231 = torch.aten.unsqueeze %2227, %int0_2081 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2231, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2082 = torch.constant.int 1 | |
%int0_2083 = torch.constant.int 0 | |
%int9223372036854775807_2084 = torch.constant.int 9223372036854775807 | |
%int1_2085 = torch.constant.int 1 | |
%2232 = torch.aten.slice.Tensor %2231, %int1_2082, %int0_2083, %int9223372036854775807_2084, %int1_2085 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2232, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2086 = torch.constant.int 2 | |
%2233 = torch.aten.unsqueeze %2232, %int2_2086 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2233, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2087 = torch.constant.int 3 | |
%int0_2088 = torch.constant.int 0 | |
%int9223372036854775807_2089 = torch.constant.int 9223372036854775807 | |
%int1_2090 = torch.constant.int 1 | |
%2234 = torch.aten.slice.Tensor %2233, %int3_2087, %int0_2088, %int9223372036854775807_2089, %int1_2090 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2234, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_2091 = torch.constant.int 0 | |
%2235 = torch.aten.unsqueeze %2230, %int0_2091 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2235, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2092 = torch.constant.int 1 | |
%int0_2093 = torch.constant.int 0 | |
%int9223372036854775807_2094 = torch.constant.int 9223372036854775807 | |
%int1_2095 = torch.constant.int 1 | |
%2236 = torch.aten.slice.Tensor %2235, %int1_2092, %int0_2093, %int9223372036854775807_2094, %int1_2095 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2236, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2096 = torch.constant.int 2 | |
%2237 = torch.aten.unsqueeze %2236, %int2_2096 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2237, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2097 = torch.constant.int 3 | |
%int0_2098 = torch.constant.int 0 | |
%int9223372036854775807_2099 = torch.constant.int 9223372036854775807 | |
%int1_2100 = torch.constant.int 1 | |
%2238 = torch.aten.slice.Tensor %2237, %int3_2097, %int0_2098, %int9223372036854775807_2099, %int1_2100 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2238, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_2101 = torch.constant.int 1 | |
%int2_2102 = torch.constant.int 2 | |
%2239 = torch.aten.transpose.int %2234, %int1_2101, %int2_2102 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2239, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2103 = torch.constant.int 1 | |
%int1_2104 = torch.constant.int 1 | |
%int1_2105 = torch.constant.int 1 | |
%int1_2106 = torch.constant.int 1 | |
%2240 = torch.prim.ListConstruct %int1_2103, %int1_2104, %int1_2105, %int1_2106 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2241 = torch.aten.repeat %2239, %2240 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2241, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2107 = torch.constant.int 1 | |
%int2_2108 = torch.constant.int 2 | |
%2242 = torch.aten.transpose.int %2238, %int1_2107, %int2_2108 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2242, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2109 = torch.constant.int 1 | |
%int2_2110 = torch.constant.int 2 | |
%2243 = torch.aten.transpose.int %2116, %int1_2109, %int2_2110 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2243, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_2111 = torch.constant.int 1 | |
%int1_2112 = torch.constant.int 1 | |
%int1_2113 = torch.constant.int 1 | |
%int1_2114 = torch.constant.int 1 | |
%2244 = torch.prim.ListConstruct %int1_2111, %int1_2112, %int1_2113, %int1_2114 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2245 = torch.aten.repeat %2242, %2244 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2245, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%2246 = torch.aten.mul.Tensor %2243, %2241 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2246, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int3_2115 = torch.constant.int 3 | |
%int0_2116 = torch.constant.int 0 | |
%int64_2117 = torch.constant.int 64 | |
%int1_2118 = torch.constant.int 1 | |
%2247 = torch.aten.slice.Tensor %2243, %int3_2115, %int0_2116, %int64_2117, %int1_2118 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %2247, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%int3_2119 = torch.constant.int 3 | |
%int64_2120 = torch.constant.int 64 | |
%int9223372036854775807_2121 = torch.constant.int 9223372036854775807 | |
%int1_2122 = torch.constant.int 1 | |
%2248 = torch.aten.slice.Tensor %2243, %int3_2119, %int64_2120, %int9223372036854775807_2121, %int1_2122 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %2248, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%2249 = torch.aten.neg %2248 : !torch.vtensor<[1,8,?,64],bf16> -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %2249, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%2250 = torch.prim.ListConstruct %2249, %2247 : (!torch.vtensor<[1,8,?,64],bf16>, !torch.vtensor<[1,8,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_2123 = torch.constant.int -1 | |
%2251 = torch.aten.cat %2250, %int-1_2123 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2251, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%2252 = torch.aten.mul.Tensor %2251, %2245 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2252, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_2124 = torch.constant.int 1 | |
%2253 = torch.aten.add.Tensor %2246, %2252, %int1_2124 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,8,?,128],bf16>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2253, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_2125 = torch.constant.int 1 | |
%int2_2126 = torch.constant.int 2 | |
%2254 = torch.aten.transpose.int %2253, %int1_2125, %int2_2126 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2254, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%2255 = torch.aten.div.Tensor %2254, %93 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2255, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_2127 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2128 = torch.constant.float 2.400000e+02 | |
%2256 = torch.aten.clamp %2255, %float-2.400000e02_2127, %float2.400000e02_2128 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2256, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_2129 = torch.constant.int 26 | |
%2257 = torch.prims.convert_element_type %2256, %int26_2129 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2257, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%2258 = torch.aten.div.Tensor %2118, %93 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2258, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_2130 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2131 = torch.constant.float 2.400000e+02 | |
%2259 = torch.aten.clamp %2258, %float-2.400000e02_2130, %float2.400000e02_2131 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2259, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_2132 = torch.constant.int 26 | |
%2260 = torch.prims.convert_element_type %2259, %int26_2132 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2260, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%int64_2133 = torch.constant.int 64 | |
%2261 = torch.aten.mul.Scalar %arg2, %int64_2133 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %2261, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int10 = torch.constant.int 10 | |
%int1_2134 = torch.constant.int 1 | |
%2262 = torch.aten.add.Scalar %2261, %int10, %int1_2134 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %2262, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int1_2135 = torch.constant.int 1 | |
%int32_2136 = torch.constant.int 32 | |
%int8_2137 = torch.constant.int 8 | |
%int128_2138 = torch.constant.int 128 | |
%2263 = torch.prim.ListConstruct %int1_2135, %748, %int32_2136, %int8_2137, %int128_2138 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2264 = torch.aten.view %2257, %2263 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2264, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_2139 = torch.constant.int 32 | |
%int8_2140 = torch.constant.int 8 | |
%int128_2141 = torch.constant.int 128 | |
%2265 = torch.prim.ListConstruct %748, %int32_2139, %int8_2140, %int128_2141 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2266 = torch.aten.view %2264, %2265 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2266, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%2267 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%2268 = torch.aten.view %2262, %2267 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %2268, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%int32_2142 = torch.constant.int 32 | |
%int2_2143 = torch.constant.int 2 | |
%int32_2144 = torch.constant.int 32 | |
%int8_2145 = torch.constant.int 8 | |
%int128_2146 = torch.constant.int 128 | |
%2269 = torch.prim.ListConstruct %739, %int32_2142, %int2_2143, %int32_2144, %int8_2145, %int128_2146 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2270 = torch.aten.view %1993, %2269 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2270, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_2147 = torch.constant.int 32 | |
%2271 = torch.aten.mul.int %739, %int32_2147 : !torch.int, !torch.int -> !torch.int | |
%int2_2148 = torch.constant.int 2 | |
%2272 = torch.aten.mul.int %2271, %int2_2148 : !torch.int, !torch.int -> !torch.int | |
%int32_2149 = torch.constant.int 32 | |
%int8_2150 = torch.constant.int 8 | |
%int128_2151 = torch.constant.int 128 | |
%2273 = torch.prim.ListConstruct %2272, %int32_2149, %int8_2150, %int128_2151 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2274 = torch.aten.view %2270, %2273 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2274, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%2275 = torch.prim.ListConstruct %2268 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_2152 = torch.constant.bool false | |
%2276 = torch.aten.index_put %2274, %2275, %2266, %false_2152 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2276, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_2153 = torch.constant.int 32 | |
%int2_2154 = torch.constant.int 2 | |
%int32_2155 = torch.constant.int 32 | |
%int8_2156 = torch.constant.int 8 | |
%int128_2157 = torch.constant.int 128 | |
%2277 = torch.prim.ListConstruct %739, %int32_2153, %int2_2154, %int32_2155, %int8_2156, %int128_2157 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2278 = torch.aten.view %2276, %2277 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2278, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_2158 = torch.constant.int 2097152 | |
%2279 = torch.prim.ListConstruct %739, %int2097152_2158 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2280 = torch.aten.view %2278, %2279 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2280, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int32_2159 = torch.constant.int 32 | |
%int2_2160 = torch.constant.int 2 | |
%int32_2161 = torch.constant.int 32 | |
%int8_2162 = torch.constant.int 8 | |
%int128_2163 = torch.constant.int 128 | |
%2281 = torch.prim.ListConstruct %739, %int32_2159, %int2_2160, %int32_2161, %int8_2162, %int128_2163 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2282 = torch.aten.view %2280, %2281 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2282, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_2164 = torch.constant.int 32 | |
%int8_2165 = torch.constant.int 8 | |
%int128_2166 = torch.constant.int 128 | |
%2283 = torch.prim.ListConstruct %2272, %int32_2164, %int8_2165, %int128_2166 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2284 = torch.aten.view %2282, %2283 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2284, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_2167 = torch.constant.int 1 | |
%int32_2168 = torch.constant.int 32 | |
%int8_2169 = torch.constant.int 8 | |
%int128_2170 = torch.constant.int 128 | |
%2285 = torch.prim.ListConstruct %int1_2167, %748, %int32_2168, %int8_2169, %int128_2170 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2286 = torch.aten.view %2260, %2285 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2286, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_2171 = torch.constant.int 32 | |
%int8_2172 = torch.constant.int 8 | |
%int128_2173 = torch.constant.int 128 | |
%2287 = torch.prim.ListConstruct %748, %int32_2171, %int8_2172, %int128_2173 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2288 = torch.aten.view %2286, %2287 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2288, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_2174 = torch.constant.int 1 | |
%int1_2175 = torch.constant.int 1 | |
%2289 = torch.aten.add.Scalar %2262, %int1_2174, %int1_2175 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %2289, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%2290 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%2291 = torch.aten.view %2289, %2290 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %2291, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%2292 = torch.prim.ListConstruct %2291 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_2176 = torch.constant.bool false | |
%2293 = torch.aten.index_put %2284, %2292, %2288, %false_2176 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2293, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_2177 = torch.constant.int 32 | |
%int2_2178 = torch.constant.int 2 | |
%int32_2179 = torch.constant.int 32 | |
%int8_2180 = torch.constant.int 8 | |
%int128_2181 = torch.constant.int 128 | |
%2294 = torch.prim.ListConstruct %739, %int32_2177, %int2_2178, %int32_2179, %int8_2180, %int128_2181 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2295 = torch.aten.view %2293, %2294 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2295, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_2182 = torch.constant.int 2097152 | |
%2296 = torch.prim.ListConstruct %739, %int2097152_2182 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2297 = torch.aten.view %2295, %2296 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2297, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int-2_2183 = torch.constant.int -2 | |
%2298 = torch.aten.unsqueeze %2257, %int-2_2183 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2298, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_2184 = torch.constant.int 1 | |
%int8_2185 = torch.constant.int 8 | |
%int4_2186 = torch.constant.int 4 | |
%int128_2187 = torch.constant.int 128 | |
%2299 = torch.prim.ListConstruct %int1_2184, %2224, %int8_2185, %int4_2186, %int128_2187 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_2188 = torch.constant.bool false | |
%2300 = torch.aten.expand %2298, %2299, %false_2188 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2300, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_2189 = torch.constant.int 0 | |
%2301 = torch.aten.clone %2300, %int0_2189 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2301, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_2190 = torch.constant.int 1 | |
%int32_2191 = torch.constant.int 32 | |
%int128_2192 = torch.constant.int 128 | |
%2302 = torch.prim.ListConstruct %int1_2190, %2224, %int32_2191, %int128_2192 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2303 = torch.aten._unsafe_view %2301, %2302 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2303, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int-2_2193 = torch.constant.int -2 | |
%2304 = torch.aten.unsqueeze %2260, %int-2_2193 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2304, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_2194 = torch.constant.int 1 | |
%2305 = torch.aten.size.int %2111, %int1_2194 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int1_2195 = torch.constant.int 1 | |
%int8_2196 = torch.constant.int 8 | |
%int4_2197 = torch.constant.int 4 | |
%int128_2198 = torch.constant.int 128 | |
%2306 = torch.prim.ListConstruct %int1_2195, %2305, %int8_2196, %int4_2197, %int128_2198 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_2199 = torch.constant.bool false | |
%2307 = torch.aten.expand %2304, %2306, %false_2199 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2307, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_2200 = torch.constant.int 0 | |
%2308 = torch.aten.clone %2307, %int0_2200 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2308, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_2201 = torch.constant.int 1 | |
%int32_2202 = torch.constant.int 32 | |
%int128_2203 = torch.constant.int 128 | |
%2309 = torch.prim.ListConstruct %int1_2201, %2305, %int32_2202, %int128_2203 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2310 = torch.aten._unsafe_view %2308, %2309 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2310, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int6_2204 = torch.constant.int 6 | |
%2311 = torch.prims.convert_element_type %2303, %int6_2204 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2311, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%2312 = torch.aten.mul.Tensor %2311, %93 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2312, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_2205 = torch.constant.int 15 | |
%2313 = torch.prims.convert_element_type %2312, %int15_2205 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2313, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int6_2206 = torch.constant.int 6 | |
%2314 = torch.prims.convert_element_type %2310, %int6_2206 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2314, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%2315 = torch.aten.mul.Tensor %2314, %93 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2315, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_2207 = torch.constant.int 15 | |
%2316 = torch.prims.convert_element_type %2315, %int15_2207 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2316, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_2208 = torch.constant.int 1 | |
%int2_2209 = torch.constant.int 2 | |
%2317 = torch.aten.transpose.int %2186, %int1_2208, %int2_2209 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2317, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2210 = torch.constant.int 1 | |
%int2_2211 = torch.constant.int 2 | |
%2318 = torch.aten.transpose.int %2313, %int1_2210, %int2_2211 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2318, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2212 = torch.constant.int 1 | |
%int2_2213 = torch.constant.int 2 | |
%2319 = torch.aten.transpose.int %2316, %int1_2212, %int2_2213 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2319, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%float0.000000e00_2214 = torch.constant.float 0.000000e+00 | |
%true_2215 = torch.constant.bool true | |
%none_2216 = torch.constant.none | |
%none_2217 = torch.constant.none | |
%2320:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%2317, %2318, %2319, %float0.000000e00_2214, %true_2215, %none_2216, %none_2217) : (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?],f32>) | |
torch.bind_symbolic_shape %2320#0, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2218 = torch.constant.int 1 | |
%int2_2219 = torch.constant.int 2 | |
%2321 = torch.aten.transpose.int %2320#0, %int1_2218, %int2_2219 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2321, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_2220 = torch.constant.int 1 | |
%int4096_2221 = torch.constant.int 4096 | |
%2322 = torch.prim.ListConstruct %int1_2220, %2156, %int4096_2221 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2323 = torch.aten.view %2321, %2322 : !torch.vtensor<[1,?,32,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2323, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2324 = torch.aten.div.Tensor %2323, %94 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2324, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2222 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2223 = torch.constant.float 2.400000e+02 | |
%2325 = torch.aten.clamp %2324, %float-2.400000e02_2222, %float2.400000e02_2223 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2325, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2224 = torch.constant.int 26 | |
%2326 = torch.prims.convert_element_type %2325, %int26_2224 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2326, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2225 = torch.constant.int -2 | |
%int-1_2226 = torch.constant.int -1 | |
%2327 = torch.aten.transpose.int %95, %int-2_2225, %int-1_2226 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_2227 = torch.constant.int 4096 | |
%2328 = torch.prim.ListConstruct %2156, %int4096_2227 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2329 = torch.aten.view %2326, %2328 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2329, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2330 = torch.aten.mm %2329, %2327 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2330, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_2228 = torch.constant.int 1 | |
%int4096_2229 = torch.constant.int 4096 | |
%2331 = torch.prim.ListConstruct %int1_2228, %2156, %int4096_2229 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2332 = torch.aten.view %2330, %2331 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2332, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_2230 = torch.constant.int 15 | |
%2333 = torch.prims.convert_element_type %2332, %int15_2230 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2333, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_2231 = torch.constant.int 1 | |
%2334 = torch.aten.add.Tensor %2073, %2333, %int1_2231 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2334, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_2232 = torch.constant.int 6 | |
%2335 = torch.prims.convert_element_type %2334, %int6_2232 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2335, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_2233 = torch.constant.int 2 | |
%2336 = torch.aten.pow.Tensor_Scalar %2335, %int2_2233 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2336, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_2234 = torch.constant.int -1 | |
%2337 = torch.prim.ListConstruct %int-1_2234 : (!torch.int) -> !torch.list<int> | |
%true_2235 = torch.constant.bool true | |
%none_2236 = torch.constant.none | |
%2338 = torch.aten.mean.dim %2336, %2337, %true_2235, %none_2236 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2338, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_2237 = torch.constant.float 1.000000e-05 | |
%int1_2238 = torch.constant.int 1 | |
%2339 = torch.aten.add.Scalar %2338, %float1.000000e-05_2237, %int1_2238 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2339, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2340 = torch.aten.rsqrt %2339 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2340, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2341 = torch.aten.mul.Tensor %2335, %2340 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2341, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_2239 = torch.constant.int 15 | |
%2342 = torch.prims.convert_element_type %2341, %int15_2239 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2342, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2343 = torch.aten.mul.Tensor %96, %2342 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2343, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2344 = torch.aten.div.Tensor %2343, %97 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2344, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2240 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2241 = torch.constant.float 2.400000e+02 | |
%2345 = torch.aten.clamp %2344, %float-2.400000e02_2240, %float2.400000e02_2241 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2345, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2242 = torch.constant.int 26 | |
%2346 = torch.prims.convert_element_type %2345, %int26_2242 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2346, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2243 = torch.constant.int -2 | |
%int-1_2244 = torch.constant.int -1 | |
%2347 = torch.aten.transpose.int %98, %int-2_2243, %int-1_2244 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_2245 = torch.constant.int 4096 | |
%2348 = torch.prim.ListConstruct %564, %int4096_2245 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2349 = torch.aten.view %2346, %2348 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2349, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2350 = torch.aten.mm %2349, %2347 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2350, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_2246 = torch.constant.int 1 | |
%int14336_2247 = torch.constant.int 14336 | |
%2351 = torch.prim.ListConstruct %int1_2246, %564, %int14336_2247 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2352 = torch.aten.view %2350, %2351 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2352, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_2248 = torch.constant.int 15 | |
%2353 = torch.prims.convert_element_type %2352, %int15_2248 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2353, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2354 = torch.aten.silu %2353 : !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2354, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2355 = torch.aten.div.Tensor %2343, %99 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2355, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2249 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2250 = torch.constant.float 2.400000e+02 | |
%2356 = torch.aten.clamp %2355, %float-2.400000e02_2249, %float2.400000e02_2250 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2356, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2251 = torch.constant.int 26 | |
%2357 = torch.prims.convert_element_type %2356, %int26_2251 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2357, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2252 = torch.constant.int -2 | |
%int-1_2253 = torch.constant.int -1 | |
%2358 = torch.aten.transpose.int %100, %int-2_2252, %int-1_2253 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_2254 = torch.constant.int 4096 | |
%2359 = torch.prim.ListConstruct %564, %int4096_2254 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2360 = torch.aten.view %2357, %2359 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2360, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2361 = torch.aten.mm %2360, %2358 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2361, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_2255 = torch.constant.int 1 | |
%int14336_2256 = torch.constant.int 14336 | |
%2362 = torch.prim.ListConstruct %int1_2255, %564, %int14336_2256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2363 = torch.aten.view %2361, %2362 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2363, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_2257 = torch.constant.int 15 | |
%2364 = torch.prims.convert_element_type %2363, %int15_2257 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2364, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2365 = torch.aten.mul.Tensor %2354, %2364 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2365, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2366 = torch.aten.div.Tensor %2365, %101 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2366, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%float-2.400000e02_2258 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2259 = torch.constant.float 2.400000e+02 | |
%2367 = torch.aten.clamp %2366, %float-2.400000e02_2258, %float2.400000e02_2259 : !torch.vtensor<[1,?,14336],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2367, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%int26_2260 = torch.constant.int 26 | |
%2368 = torch.prims.convert_element_type %2367, %int26_2260 : !torch.vtensor<[1,?,14336],bf16>, !torch.int -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2368, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int-2_2261 = torch.constant.int -2 | |
%int-1_2262 = torch.constant.int -1 | |
%2369 = torch.aten.transpose.int %102, %int-2_2261, %int-1_2262 : !torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%int1_2263 = torch.constant.int 1 | |
%2370 = torch.aten.size.int %2352, %int1_2263 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int14336_2264 = torch.constant.int 14336 | |
%2371 = torch.prim.ListConstruct %2370, %int14336_2264 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2372 = torch.aten.view %2368, %2371 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2372, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%2373 = torch.aten.mm %2372, %2369 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.vtensor<[14336,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2373, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_2265 = torch.constant.int 1 | |
%int4096_2266 = torch.constant.int 4096 | |
%2374 = torch.prim.ListConstruct %int1_2265, %2370, %int4096_2266 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2375 = torch.aten.view %2373, %2374 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2375, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_2267 = torch.constant.int 15 | |
%2376 = torch.prims.convert_element_type %2375, %int15_2267 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2376, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_2268 = torch.constant.int 1 | |
%2377 = torch.aten.add.Tensor %2334, %2376, %int1_2268 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2377, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_2269 = torch.constant.int 6 | |
%2378 = torch.prims.convert_element_type %2377, %int6_2269 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2378, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_2270 = torch.constant.int 2 | |
%2379 = torch.aten.pow.Tensor_Scalar %2378, %int2_2270 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2379, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_2271 = torch.constant.int -1 | |
%2380 = torch.prim.ListConstruct %int-1_2271 : (!torch.int) -> !torch.list<int> | |
%true_2272 = torch.constant.bool true | |
%none_2273 = torch.constant.none | |
%2381 = torch.aten.mean.dim %2379, %2380, %true_2272, %none_2273 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2381, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_2274 = torch.constant.float 1.000000e-05 | |
%int1_2275 = torch.constant.int 1 | |
%2382 = torch.aten.add.Scalar %2381, %float1.000000e-05_2274, %int1_2275 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2382, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2383 = torch.aten.rsqrt %2382 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2383, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2384 = torch.aten.mul.Tensor %2378, %2383 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2384, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_2276 = torch.constant.int 15 | |
%2385 = torch.prims.convert_element_type %2384, %int15_2276 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2385, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2386 = torch.aten.mul.Tensor %103, %2385 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2386, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2387 = torch.aten.div.Tensor %2386, %104 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2387, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2277 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2278 = torch.constant.float 2.400000e+02 | |
%2388 = torch.aten.clamp %2387, %float-2.400000e02_2277, %float2.400000e02_2278 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2388, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2279 = torch.constant.int 26 | |
%2389 = torch.prims.convert_element_type %2388, %int26_2279 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2389, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2280 = torch.constant.int -2 | |
%int-1_2281 = torch.constant.int -1 | |
%2390 = torch.aten.transpose.int %105, %int-2_2280, %int-1_2281 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_2282 = torch.constant.int 4096 | |
%2391 = torch.prim.ListConstruct %564, %int4096_2282 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2392 = torch.aten.view %2389, %2391 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2392, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2393 = torch.aten.mm %2392, %2390 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2393, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_2283 = torch.constant.int 1 | |
%int4096_2284 = torch.constant.int 4096 | |
%2394 = torch.prim.ListConstruct %int1_2283, %564, %int4096_2284 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2395 = torch.aten.view %2393, %2394 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2395, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_2285 = torch.constant.int 15 | |
%2396 = torch.prims.convert_element_type %2395, %int15_2285 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2396, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2397 = torch.aten.div.Tensor %2386, %106 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2397, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2286 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2287 = torch.constant.float 2.400000e+02 | |
%2398 = torch.aten.clamp %2397, %float-2.400000e02_2286, %float2.400000e02_2287 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2398, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2288 = torch.constant.int 26 | |
%2399 = torch.prims.convert_element_type %2398, %int26_2288 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2399, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2289 = torch.constant.int -2 | |
%int-1_2290 = torch.constant.int -1 | |
%2400 = torch.aten.transpose.int %107, %int-2_2289, %int-1_2290 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_2291 = torch.constant.int 4096 | |
%2401 = torch.prim.ListConstruct %564, %int4096_2291 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2402 = torch.aten.view %2399, %2401 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2402, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2403 = torch.aten.mm %2402, %2400 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2403, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_2292 = torch.constant.int 1 | |
%int1024_2293 = torch.constant.int 1024 | |
%2404 = torch.prim.ListConstruct %int1_2292, %564, %int1024_2293 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2405 = torch.aten.view %2403, %2404 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2405, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_2294 = torch.constant.int 15 | |
%2406 = torch.prims.convert_element_type %2405, %int15_2294 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %2406, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%2407 = torch.aten.div.Tensor %2386, %108 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2407, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2295 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2296 = torch.constant.float 2.400000e+02 | |
%2408 = torch.aten.clamp %2407, %float-2.400000e02_2295, %float2.400000e02_2296 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2408, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2297 = torch.constant.int 26 | |
%2409 = torch.prims.convert_element_type %2408, %int26_2297 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2409, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2298 = torch.constant.int -2 | |
%int-1_2299 = torch.constant.int -1 | |
%2410 = torch.aten.transpose.int %109, %int-2_2298, %int-1_2299 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_2300 = torch.constant.int 4096 | |
%2411 = torch.prim.ListConstruct %564, %int4096_2300 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2412 = torch.aten.view %2409, %2411 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2412, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2413 = torch.aten.mm %2412, %2410 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2413, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_2301 = torch.constant.int 1 | |
%int1024_2302 = torch.constant.int 1024 | |
%2414 = torch.prim.ListConstruct %int1_2301, %564, %int1024_2302 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2415 = torch.aten.view %2413, %2414 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2415, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_2303 = torch.constant.int 15 | |
%2416 = torch.prims.convert_element_type %2415, %int15_2303 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %2416, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%int1_2304 = torch.constant.int 1 | |
%int32_2305 = torch.constant.int 32 | |
%int128_2306 = torch.constant.int 128 | |
%2417 = torch.prim.ListConstruct %int1_2304, %564, %int32_2305, %int128_2306 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2418 = torch.aten.view %2396, %2417 : !torch.vtensor<[1,?,4096],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2418, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_2307 = torch.constant.int 1 | |
%int8_2308 = torch.constant.int 8 | |
%int128_2309 = torch.constant.int 128 | |
%2419 = torch.prim.ListConstruct %int1_2307, %564, %int8_2308, %int128_2309 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2420 = torch.aten.view %2406, %2419 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2420, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int1_2310 = torch.constant.int 1 | |
%int8_2311 = torch.constant.int 8 | |
%int128_2312 = torch.constant.int 128 | |
%2421 = torch.prim.ListConstruct %int1_2310, %564, %int8_2311, %int128_2312 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2422 = torch.aten.view %2416, %2421 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2422, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int131072_2313 = torch.constant.int 131072 | |
%none_2314 = torch.constant.none | |
%none_2315 = torch.constant.none | |
%cpu_2316 = torch.constant.device "cpu" | |
%false_2317 = torch.constant.bool false | |
%2423 = torch.aten.arange %int131072_2313, %none_2314, %none_2315, %cpu_2316, %false_2317 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_2318 = torch.constant.int 0 | |
%int128_2319 = torch.constant.int 128 | |
%int2_2320 = torch.constant.int 2 | |
%int4_2321 = torch.constant.int 4 | |
%none_2322 = torch.constant.none | |
%cpu_2323 = torch.constant.device "cpu" | |
%false_2324 = torch.constant.bool false | |
%2424 = torch.aten.arange.start_step %int0_2318, %int128_2319, %int2_2320, %int4_2321, %none_2322, %cpu_2323, %false_2324 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_2325 = torch.constant.int 6 | |
%2425 = torch.prims.convert_element_type %2424, %int6_2325 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_2326 = torch.constant.int 128 | |
%2426 = torch.aten.div.Scalar %2425, %int128_2326 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_2327 = torch.constant.float 5.000000e+05 | |
%2427 = torch.aten.pow.Scalar %float5.000000e05_2327, %2426 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2428 = torch.aten.reciprocal %2427 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_2328 = torch.constant.float 1.000000e+00 | |
%2429 = torch.aten.mul.Scalar %2428, %float1.000000e00_2328 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%2430 = torch.aten.reciprocal %2429 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_2329 = torch.constant.float 6.2831853071795862 | |
%2431 = torch.aten.mul.Scalar %2430, %float6.283190e00_2329 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_2330 = torch.constant.float 8.192000e+03 | |
%2432 = torch.aten.gt.Scalar %2431, %float8.192000e03_2330 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_2331 = torch.constant.int 8 | |
%2433 = torch.aten.div.Scalar %2429, %int8_2331 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2434 = torch.aten.where.self %2432, %2433, %2429 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2435 = torch.aten.reciprocal %2431 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_2332 = torch.constant.int 8192 | |
%2436 = torch.aten.mul.Scalar %2435, %int8192_2332 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2333 = torch.constant.int 1 | |
%int1_2334 = torch.constant.int 1 | |
%2437 = torch.aten.sub.Scalar %2436, %int1_2333, %int1_2334 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_2335 = torch.constant.int 3 | |
%2438 = torch.aten.div.Scalar %2437, %int3_2335 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2336 = torch.constant.int 1 | |
%int1_2337 = torch.constant.int 1 | |
%2439 = torch.aten.rsub.Scalar %2438, %int1_2336, %int1_2337 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%2440 = torch.aten.mul.Tensor %2439, %2434 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_2338 = torch.constant.int 8 | |
%2441 = torch.aten.div.Scalar %2440, %int8_2338 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2442 = torch.aten.mul.Tensor %2438, %2434 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_2339 = torch.constant.int 1 | |
%2443 = torch.aten.add.Tensor %2441, %2442, %int1_2339 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_2340 = torch.constant.float 2.048000e+03 | |
%2444 = torch.aten.lt.Scalar %2431, %float2.048000e03_2340 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2445 = torch.aten.bitwise_not %2444 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_2341 = torch.constant.float 8.192000e+03 | |
%2446 = torch.aten.gt.Scalar %2431, %float8.192000e03_2341 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2447 = torch.aten.bitwise_not %2446 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2448 = torch.aten.mul.Tensor %2445, %2447 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2449 = torch.aten.where.self %2448, %2443, %2434 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2450 = torch.prim.ListConstruct %2449, %2449 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_2342 = torch.constant.int -1 | |
%2451 = torch.aten.cat %2450, %int-1_2342 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_2343 = torch.constant.int 6 | |
%2452 = torch.prims.convert_element_type %2423, %int6_2343 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_2344 = torch.constant.int 131072 | |
%int1_2345 = torch.constant.int 1 | |
%2453 = torch.prim.ListConstruct %int131072_2344, %int1_2345 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2454 = torch.aten.view %2452, %2453 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%2455 = torch.aten.mul.Tensor %2454, %2451 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%2456 = torch.aten.cos %2455 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2346 = torch.constant.int 15 | |
%2457 = torch.prims.convert_element_type %2456, %int15_2346 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%2458 = torch.aten.sin %2455 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2347 = torch.constant.int 15 | |
%2459 = torch.prims.convert_element_type %2458, %int15_2347 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_2348 = torch.constant.int 1 | |
%2460 = torch.aten.size.int %2395, %int1_2348 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_2349 = torch.constant.int 0 | |
%2461 = torch.aten.add.int %int0_2349, %2460 : !torch.int, !torch.int -> !torch.int | |
%int0_2350 = torch.constant.int 0 | |
%int0_2351 = torch.constant.int 0 | |
%int1_2352 = torch.constant.int 1 | |
%2462 = torch.aten.slice.Tensor %2457, %int0_2350, %int0_2351, %2461, %int1_2352 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2462, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2353 = torch.constant.int 1 | |
%int0_2354 = torch.constant.int 0 | |
%int9223372036854775807_2355 = torch.constant.int 9223372036854775807 | |
%int1_2356 = torch.constant.int 1 | |
%2463 = torch.aten.slice.Tensor %2462, %int1_2353, %int0_2354, %int9223372036854775807_2355, %int1_2356 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2463, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2357 = torch.constant.int 0 | |
%2464 = torch.aten.add.int %int0_2357, %2460 : !torch.int, !torch.int -> !torch.int | |
%int0_2358 = torch.constant.int 0 | |
%int0_2359 = torch.constant.int 0 | |
%int1_2360 = torch.constant.int 1 | |
%2465 = torch.aten.slice.Tensor %2459, %int0_2358, %int0_2359, %2464, %int1_2360 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2465, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2361 = torch.constant.int 1 | |
%int0_2362 = torch.constant.int 0 | |
%int9223372036854775807_2363 = torch.constant.int 9223372036854775807 | |
%int1_2364 = torch.constant.int 1 | |
%2466 = torch.aten.slice.Tensor %2465, %int1_2361, %int0_2362, %int9223372036854775807_2363, %int1_2364 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2466, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2365 = torch.constant.int 0 | |
%2467 = torch.aten.unsqueeze %2463, %int0_2365 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2467, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2366 = torch.constant.int 1 | |
%int0_2367 = torch.constant.int 0 | |
%int9223372036854775807_2368 = torch.constant.int 9223372036854775807 | |
%int1_2369 = torch.constant.int 1 | |
%2468 = torch.aten.slice.Tensor %2467, %int1_2366, %int0_2367, %int9223372036854775807_2368, %int1_2369 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2468, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2370 = torch.constant.int 2 | |
%2469 = torch.aten.unsqueeze %2468, %int2_2370 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2469, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2371 = torch.constant.int 3 | |
%int0_2372 = torch.constant.int 0 | |
%int9223372036854775807_2373 = torch.constant.int 9223372036854775807 | |
%int1_2374 = torch.constant.int 1 | |
%2470 = torch.aten.slice.Tensor %2469, %int3_2371, %int0_2372, %int9223372036854775807_2373, %int1_2374 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2470, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_2375 = torch.constant.int 0 | |
%2471 = torch.aten.unsqueeze %2466, %int0_2375 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2471, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2376 = torch.constant.int 1 | |
%int0_2377 = torch.constant.int 0 | |
%int9223372036854775807_2378 = torch.constant.int 9223372036854775807 | |
%int1_2379 = torch.constant.int 1 | |
%2472 = torch.aten.slice.Tensor %2471, %int1_2376, %int0_2377, %int9223372036854775807_2378, %int1_2379 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2472, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2380 = torch.constant.int 2 | |
%2473 = torch.aten.unsqueeze %2472, %int2_2380 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2473, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2381 = torch.constant.int 3 | |
%int0_2382 = torch.constant.int 0 | |
%int9223372036854775807_2383 = torch.constant.int 9223372036854775807 | |
%int1_2384 = torch.constant.int 1 | |
%2474 = torch.aten.slice.Tensor %2473, %int3_2381, %int0_2382, %int9223372036854775807_2383, %int1_2384 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2474, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_2385 = torch.constant.int 1 | |
%int2_2386 = torch.constant.int 2 | |
%2475 = torch.aten.transpose.int %2470, %int1_2385, %int2_2386 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2475, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2387 = torch.constant.int 1 | |
%int1_2388 = torch.constant.int 1 | |
%int1_2389 = torch.constant.int 1 | |
%int1_2390 = torch.constant.int 1 | |
%2476 = torch.prim.ListConstruct %int1_2387, %int1_2388, %int1_2389, %int1_2390 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2477 = torch.aten.repeat %2475, %2476 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2477, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2391 = torch.constant.int 1 | |
%int2_2392 = torch.constant.int 2 | |
%2478 = torch.aten.transpose.int %2474, %int1_2391, %int2_2392 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2478, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2393 = torch.constant.int 1 | |
%int2_2394 = torch.constant.int 2 | |
%2479 = torch.aten.transpose.int %2418, %int1_2393, %int2_2394 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2479, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2395 = torch.constant.int 1 | |
%int1_2396 = torch.constant.int 1 | |
%int1_2397 = torch.constant.int 1 | |
%int1_2398 = torch.constant.int 1 | |
%2480 = torch.prim.ListConstruct %int1_2395, %int1_2396, %int1_2397, %int1_2398 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2481 = torch.aten.repeat %2478, %2480 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2481, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%2482 = torch.aten.mul.Tensor %2479, %2477 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2482, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int3_2399 = torch.constant.int 3 | |
%int0_2400 = torch.constant.int 0 | |
%int64_2401 = torch.constant.int 64 | |
%int1_2402 = torch.constant.int 1 | |
%2483 = torch.aten.slice.Tensor %2479, %int3_2399, %int0_2400, %int64_2401, %int1_2402 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %2483, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%int3_2403 = torch.constant.int 3 | |
%int64_2404 = torch.constant.int 64 | |
%int9223372036854775807_2405 = torch.constant.int 9223372036854775807 | |
%int1_2406 = torch.constant.int 1 | |
%2484 = torch.aten.slice.Tensor %2479, %int3_2403, %int64_2404, %int9223372036854775807_2405, %int1_2406 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %2484, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%2485 = torch.aten.neg %2484 : !torch.vtensor<[1,32,?,64],bf16> -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %2485, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%2486 = torch.prim.ListConstruct %2485, %2483 : (!torch.vtensor<[1,32,?,64],bf16>, !torch.vtensor<[1,32,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_2407 = torch.constant.int -1 | |
%2487 = torch.aten.cat %2486, %int-1_2407 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2487, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%2488 = torch.aten.mul.Tensor %2487, %2481 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2488, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2408 = torch.constant.int 1 | |
%2489 = torch.aten.add.Tensor %2482, %2488, %int1_2408 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2489, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2409 = torch.constant.int 1 | |
%int2_2410 = torch.constant.int 2 | |
%2490 = torch.aten.transpose.int %2489, %int1_2409, %int2_2410 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2490, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int131072_2411 = torch.constant.int 131072 | |
%none_2412 = torch.constant.none | |
%none_2413 = torch.constant.none | |
%cpu_2414 = torch.constant.device "cpu" | |
%false_2415 = torch.constant.bool false | |
%2491 = torch.aten.arange %int131072_2411, %none_2412, %none_2413, %cpu_2414, %false_2415 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_2416 = torch.constant.int 0 | |
%int128_2417 = torch.constant.int 128 | |
%int2_2418 = torch.constant.int 2 | |
%int4_2419 = torch.constant.int 4 | |
%none_2420 = torch.constant.none | |
%cpu_2421 = torch.constant.device "cpu" | |
%false_2422 = torch.constant.bool false | |
%2492 = torch.aten.arange.start_step %int0_2416, %int128_2417, %int2_2418, %int4_2419, %none_2420, %cpu_2421, %false_2422 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_2423 = torch.constant.int 6 | |
%2493 = torch.prims.convert_element_type %2492, %int6_2423 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_2424 = torch.constant.int 128 | |
%2494 = torch.aten.div.Scalar %2493, %int128_2424 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_2425 = torch.constant.float 5.000000e+05 | |
%2495 = torch.aten.pow.Scalar %float5.000000e05_2425, %2494 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2496 = torch.aten.reciprocal %2495 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_2426 = torch.constant.float 1.000000e+00 | |
%2497 = torch.aten.mul.Scalar %2496, %float1.000000e00_2426 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%2498 = torch.aten.reciprocal %2497 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_2427 = torch.constant.float 6.2831853071795862 | |
%2499 = torch.aten.mul.Scalar %2498, %float6.283190e00_2427 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_2428 = torch.constant.float 8.192000e+03 | |
%2500 = torch.aten.gt.Scalar %2499, %float8.192000e03_2428 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_2429 = torch.constant.int 8 | |
%2501 = torch.aten.div.Scalar %2497, %int8_2429 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2502 = torch.aten.where.self %2500, %2501, %2497 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2503 = torch.aten.reciprocal %2499 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_2430 = torch.constant.int 8192 | |
%2504 = torch.aten.mul.Scalar %2503, %int8192_2430 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2431 = torch.constant.int 1 | |
%int1_2432 = torch.constant.int 1 | |
%2505 = torch.aten.sub.Scalar %2504, %int1_2431, %int1_2432 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_2433 = torch.constant.int 3 | |
%2506 = torch.aten.div.Scalar %2505, %int3_2433 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2434 = torch.constant.int 1 | |
%int1_2435 = torch.constant.int 1 | |
%2507 = torch.aten.rsub.Scalar %2506, %int1_2434, %int1_2435 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%2508 = torch.aten.mul.Tensor %2507, %2502 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_2436 = torch.constant.int 8 | |
%2509 = torch.aten.div.Scalar %2508, %int8_2436 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2510 = torch.aten.mul.Tensor %2506, %2502 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_2437 = torch.constant.int 1 | |
%2511 = torch.aten.add.Tensor %2509, %2510, %int1_2437 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_2438 = torch.constant.float 2.048000e+03 | |
%2512 = torch.aten.lt.Scalar %2499, %float2.048000e03_2438 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2513 = torch.aten.bitwise_not %2512 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_2439 = torch.constant.float 8.192000e+03 | |
%2514 = torch.aten.gt.Scalar %2499, %float8.192000e03_2439 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2515 = torch.aten.bitwise_not %2514 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2516 = torch.aten.mul.Tensor %2513, %2515 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2517 = torch.aten.where.self %2516, %2511, %2502 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2518 = torch.prim.ListConstruct %2517, %2517 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_2440 = torch.constant.int -1 | |
%2519 = torch.aten.cat %2518, %int-1_2440 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_2441 = torch.constant.int 6 | |
%2520 = torch.prims.convert_element_type %2491, %int6_2441 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_2442 = torch.constant.int 131072 | |
%int1_2443 = torch.constant.int 1 | |
%2521 = torch.prim.ListConstruct %int131072_2442, %int1_2443 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2522 = torch.aten.view %2520, %2521 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%2523 = torch.aten.mul.Tensor %2522, %2519 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%2524 = torch.aten.cos %2523 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2444 = torch.constant.int 15 | |
%2525 = torch.prims.convert_element_type %2524, %int15_2444 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%2526 = torch.aten.sin %2523 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2445 = torch.constant.int 15 | |
%2527 = torch.prims.convert_element_type %2526, %int15_2445 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_2446 = torch.constant.int 1 | |
%2528 = torch.aten.size.int %2405, %int1_2446 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_2447 = torch.constant.int 0 | |
%2529 = torch.aten.add.int %int0_2447, %2528 : !torch.int, !torch.int -> !torch.int | |
%int0_2448 = torch.constant.int 0 | |
%int0_2449 = torch.constant.int 0 | |
%int1_2450 = torch.constant.int 1 | |
%2530 = torch.aten.slice.Tensor %2525, %int0_2448, %int0_2449, %2529, %int1_2450 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2530, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2451 = torch.constant.int 1 | |
%int0_2452 = torch.constant.int 0 | |
%int9223372036854775807_2453 = torch.constant.int 9223372036854775807 | |
%int1_2454 = torch.constant.int 1 | |
%2531 = torch.aten.slice.Tensor %2530, %int1_2451, %int0_2452, %int9223372036854775807_2453, %int1_2454 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2531, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2455 = torch.constant.int 0 | |
%2532 = torch.aten.add.int %int0_2455, %2528 : !torch.int, !torch.int -> !torch.int | |
%int0_2456 = torch.constant.int 0 | |
%int0_2457 = torch.constant.int 0 | |
%int1_2458 = torch.constant.int 1 | |
%2533 = torch.aten.slice.Tensor %2527, %int0_2456, %int0_2457, %2532, %int1_2458 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2533, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2459 = torch.constant.int 1 | |
%int0_2460 = torch.constant.int 0 | |
%int9223372036854775807_2461 = torch.constant.int 9223372036854775807 | |
%int1_2462 = torch.constant.int 1 | |
%2534 = torch.aten.slice.Tensor %2533, %int1_2459, %int0_2460, %int9223372036854775807_2461, %int1_2462 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2534, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2463 = torch.constant.int 0 | |
%2535 = torch.aten.unsqueeze %2531, %int0_2463 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2535, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2464 = torch.constant.int 1 | |
%int0_2465 = torch.constant.int 0 | |
%int9223372036854775807_2466 = torch.constant.int 9223372036854775807 | |
%int1_2467 = torch.constant.int 1 | |
%2536 = torch.aten.slice.Tensor %2535, %int1_2464, %int0_2465, %int9223372036854775807_2466, %int1_2467 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2536, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2468 = torch.constant.int 2 | |
%2537 = torch.aten.unsqueeze %2536, %int2_2468 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2537, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2469 = torch.constant.int 3 | |
%int0_2470 = torch.constant.int 0 | |
%int9223372036854775807_2471 = torch.constant.int 9223372036854775807 | |
%int1_2472 = torch.constant.int 1 | |
%2538 = torch.aten.slice.Tensor %2537, %int3_2469, %int0_2470, %int9223372036854775807_2471, %int1_2472 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2538, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_2473 = torch.constant.int 0 | |
%2539 = torch.aten.unsqueeze %2534, %int0_2473 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2539, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2474 = torch.constant.int 1 | |
%int0_2475 = torch.constant.int 0 | |
%int9223372036854775807_2476 = torch.constant.int 9223372036854775807 | |
%int1_2477 = torch.constant.int 1 | |
%2540 = torch.aten.slice.Tensor %2539, %int1_2474, %int0_2475, %int9223372036854775807_2476, %int1_2477 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2540, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2478 = torch.constant.int 2 | |
%2541 = torch.aten.unsqueeze %2540, %int2_2478 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2541, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2479 = torch.constant.int 3 | |
%int0_2480 = torch.constant.int 0 | |
%int9223372036854775807_2481 = torch.constant.int 9223372036854775807 | |
%int1_2482 = torch.constant.int 1 | |
%2542 = torch.aten.slice.Tensor %2541, %int3_2479, %int0_2480, %int9223372036854775807_2481, %int1_2482 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2542, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_2483 = torch.constant.int 1 | |
%int2_2484 = torch.constant.int 2 | |
%2543 = torch.aten.transpose.int %2538, %int1_2483, %int2_2484 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2543, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2485 = torch.constant.int 1 | |
%int1_2486 = torch.constant.int 1 | |
%int1_2487 = torch.constant.int 1 | |
%int1_2488 = torch.constant.int 1 | |
%2544 = torch.prim.ListConstruct %int1_2485, %int1_2486, %int1_2487, %int1_2488 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2545 = torch.aten.repeat %2543, %2544 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2545, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2489 = torch.constant.int 1 | |
%int2_2490 = torch.constant.int 2 | |
%2546 = torch.aten.transpose.int %2542, %int1_2489, %int2_2490 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2546, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2491 = torch.constant.int 1 | |
%int2_2492 = torch.constant.int 2 | |
%2547 = torch.aten.transpose.int %2420, %int1_2491, %int2_2492 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2547, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_2493 = torch.constant.int 1 | |
%int1_2494 = torch.constant.int 1 | |
%int1_2495 = torch.constant.int 1 | |
%int1_2496 = torch.constant.int 1 | |
%2548 = torch.prim.ListConstruct %int1_2493, %int1_2494, %int1_2495, %int1_2496 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2549 = torch.aten.repeat %2546, %2548 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2549, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%2550 = torch.aten.mul.Tensor %2547, %2545 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2550, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int3_2497 = torch.constant.int 3 | |
%int0_2498 = torch.constant.int 0 | |
%int64_2499 = torch.constant.int 64 | |
%int1_2500 = torch.constant.int 1 | |
%2551 = torch.aten.slice.Tensor %2547, %int3_2497, %int0_2498, %int64_2499, %int1_2500 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %2551, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%int3_2501 = torch.constant.int 3 | |
%int64_2502 = torch.constant.int 64 | |
%int9223372036854775807_2503 = torch.constant.int 9223372036854775807 | |
%int1_2504 = torch.constant.int 1 | |
%2552 = torch.aten.slice.Tensor %2547, %int3_2501, %int64_2502, %int9223372036854775807_2503, %int1_2504 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %2552, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%2553 = torch.aten.neg %2552 : !torch.vtensor<[1,8,?,64],bf16> -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %2553, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%2554 = torch.prim.ListConstruct %2553, %2551 : (!torch.vtensor<[1,8,?,64],bf16>, !torch.vtensor<[1,8,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_2505 = torch.constant.int -1 | |
%2555 = torch.aten.cat %2554, %int-1_2505 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2555, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%2556 = torch.aten.mul.Tensor %2555, %2549 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2556, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_2506 = torch.constant.int 1 | |
%2557 = torch.aten.add.Tensor %2550, %2556, %int1_2506 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,8,?,128],bf16>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2557, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_2507 = torch.constant.int 1 | |
%int2_2508 = torch.constant.int 2 | |
%2558 = torch.aten.transpose.int %2557, %int1_2507, %int2_2508 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2558, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%2559 = torch.aten.div.Tensor %2558, %110 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2559, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_2509 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2510 = torch.constant.float 2.400000e+02 | |
%2560 = torch.aten.clamp %2559, %float-2.400000e02_2509, %float2.400000e02_2510 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2560, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_2511 = torch.constant.int 26 | |
%2561 = torch.prims.convert_element_type %2560, %int26_2511 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2561, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%2562 = torch.aten.div.Tensor %2422, %110 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2562, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_2512 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2513 = torch.constant.float 2.400000e+02 | |
%2563 = torch.aten.clamp %2562, %float-2.400000e02_2512, %float2.400000e02_2513 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2563, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_2514 = torch.constant.int 26 | |
%2564 = torch.prims.convert_element_type %2563, %int26_2514 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2564, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%int64_2515 = torch.constant.int 64 | |
%2565 = torch.aten.mul.Scalar %arg2, %int64_2515 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %2565, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int12 = torch.constant.int 12 | |
%int1_2516 = torch.constant.int 1 | |
%2566 = torch.aten.add.Scalar %2565, %int12, %int1_2516 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %2566, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int1_2517 = torch.constant.int 1 | |
%int32_2518 = torch.constant.int 32 | |
%int8_2519 = torch.constant.int 8 | |
%int128_2520 = torch.constant.int 128 | |
%2567 = torch.prim.ListConstruct %int1_2517, %748, %int32_2518, %int8_2519, %int128_2520 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2568 = torch.aten.view %2561, %2567 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2568, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_2521 = torch.constant.int 32 | |
%int8_2522 = torch.constant.int 8 | |
%int128_2523 = torch.constant.int 128 | |
%2569 = torch.prim.ListConstruct %748, %int32_2521, %int8_2522, %int128_2523 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2570 = torch.aten.view %2568, %2569 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2570, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%2571 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%2572 = torch.aten.view %2566, %2571 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %2572, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%int32_2524 = torch.constant.int 32 | |
%int2_2525 = torch.constant.int 2 | |
%int32_2526 = torch.constant.int 32 | |
%int8_2527 = torch.constant.int 8 | |
%int128_2528 = torch.constant.int 128 | |
%2573 = torch.prim.ListConstruct %739, %int32_2524, %int2_2525, %int32_2526, %int8_2527, %int128_2528 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2574 = torch.aten.view %2297, %2573 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2574, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_2529 = torch.constant.int 32 | |
%2575 = torch.aten.mul.int %739, %int32_2529 : !torch.int, !torch.int -> !torch.int | |
%int2_2530 = torch.constant.int 2 | |
%2576 = torch.aten.mul.int %2575, %int2_2530 : !torch.int, !torch.int -> !torch.int | |
%int32_2531 = torch.constant.int 32 | |
%int8_2532 = torch.constant.int 8 | |
%int128_2533 = torch.constant.int 128 | |
%2577 = torch.prim.ListConstruct %2576, %int32_2531, %int8_2532, %int128_2533 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2578 = torch.aten.view %2574, %2577 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2578, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%2579 = torch.prim.ListConstruct %2572 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_2534 = torch.constant.bool false | |
%2580 = torch.aten.index_put %2578, %2579, %2570, %false_2534 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2580, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_2535 = torch.constant.int 32 | |
%int2_2536 = torch.constant.int 2 | |
%int32_2537 = torch.constant.int 32 | |
%int8_2538 = torch.constant.int 8 | |
%int128_2539 = torch.constant.int 128 | |
%2581 = torch.prim.ListConstruct %739, %int32_2535, %int2_2536, %int32_2537, %int8_2538, %int128_2539 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2582 = torch.aten.view %2580, %2581 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2582, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_2540 = torch.constant.int 2097152 | |
%2583 = torch.prim.ListConstruct %739, %int2097152_2540 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2584 = torch.aten.view %2582, %2583 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2584, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int32_2541 = torch.constant.int 32 | |
%int2_2542 = torch.constant.int 2 | |
%int32_2543 = torch.constant.int 32 | |
%int8_2544 = torch.constant.int 8 | |
%int128_2545 = torch.constant.int 128 | |
%2585 = torch.prim.ListConstruct %739, %int32_2541, %int2_2542, %int32_2543, %int8_2544, %int128_2545 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2586 = torch.aten.view %2584, %2585 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2586, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_2546 = torch.constant.int 32 | |
%int8_2547 = torch.constant.int 8 | |
%int128_2548 = torch.constant.int 128 | |
%2587 = torch.prim.ListConstruct %2576, %int32_2546, %int8_2547, %int128_2548 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2588 = torch.aten.view %2586, %2587 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2588, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_2549 = torch.constant.int 1 | |
%int32_2550 = torch.constant.int 32 | |
%int8_2551 = torch.constant.int 8 | |
%int128_2552 = torch.constant.int 128 | |
%2589 = torch.prim.ListConstruct %int1_2549, %748, %int32_2550, %int8_2551, %int128_2552 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2590 = torch.aten.view %2564, %2589 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2590, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_2553 = torch.constant.int 32 | |
%int8_2554 = torch.constant.int 8 | |
%int128_2555 = torch.constant.int 128 | |
%2591 = torch.prim.ListConstruct %748, %int32_2553, %int8_2554, %int128_2555 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2592 = torch.aten.view %2590, %2591 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2592, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_2556 = torch.constant.int 1 | |
%int1_2557 = torch.constant.int 1 | |
%2593 = torch.aten.add.Scalar %2566, %int1_2556, %int1_2557 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %2593, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%2594 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%2595 = torch.aten.view %2593, %2594 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %2595, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%2596 = torch.prim.ListConstruct %2595 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_2558 = torch.constant.bool false | |
%2597 = torch.aten.index_put %2588, %2596, %2592, %false_2558 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2597, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_2559 = torch.constant.int 32 | |
%int2_2560 = torch.constant.int 2 | |
%int32_2561 = torch.constant.int 32 | |
%int8_2562 = torch.constant.int 8 | |
%int128_2563 = torch.constant.int 128 | |
%2598 = torch.prim.ListConstruct %739, %int32_2559, %int2_2560, %int32_2561, %int8_2562, %int128_2563 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2599 = torch.aten.view %2597, %2598 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2599, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_2564 = torch.constant.int 2097152 | |
%2600 = torch.prim.ListConstruct %739, %int2097152_2564 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2601 = torch.aten.view %2599, %2600 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2601, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int-2_2565 = torch.constant.int -2 | |
%2602 = torch.aten.unsqueeze %2561, %int-2_2565 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2602, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_2566 = torch.constant.int 1 | |
%int8_2567 = torch.constant.int 8 | |
%int4_2568 = torch.constant.int 4 | |
%int128_2569 = torch.constant.int 128 | |
%2603 = torch.prim.ListConstruct %int1_2566, %2528, %int8_2567, %int4_2568, %int128_2569 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_2570 = torch.constant.bool false | |
%2604 = torch.aten.expand %2602, %2603, %false_2570 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2604, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_2571 = torch.constant.int 0 | |
%2605 = torch.aten.clone %2604, %int0_2571 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2605, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_2572 = torch.constant.int 1 | |
%int32_2573 = torch.constant.int 32 | |
%int128_2574 = torch.constant.int 128 | |
%2606 = torch.prim.ListConstruct %int1_2572, %2528, %int32_2573, %int128_2574 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2607 = torch.aten._unsafe_view %2605, %2606 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2607, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int-2_2575 = torch.constant.int -2 | |
%2608 = torch.aten.unsqueeze %2564, %int-2_2575 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2608, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_2576 = torch.constant.int 1 | |
%2609 = torch.aten.size.int %2415, %int1_2576 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int1_2577 = torch.constant.int 1 | |
%int8_2578 = torch.constant.int 8 | |
%int4_2579 = torch.constant.int 4 | |
%int128_2580 = torch.constant.int 128 | |
%2610 = torch.prim.ListConstruct %int1_2577, %2609, %int8_2578, %int4_2579, %int128_2580 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_2581 = torch.constant.bool false | |
%2611 = torch.aten.expand %2608, %2610, %false_2581 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2611, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_2582 = torch.constant.int 0 | |
%2612 = torch.aten.clone %2611, %int0_2582 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2612, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_2583 = torch.constant.int 1 | |
%int32_2584 = torch.constant.int 32 | |
%int128_2585 = torch.constant.int 128 | |
%2613 = torch.prim.ListConstruct %int1_2583, %2609, %int32_2584, %int128_2585 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2614 = torch.aten._unsafe_view %2612, %2613 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2614, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int6_2586 = torch.constant.int 6 | |
%2615 = torch.prims.convert_element_type %2607, %int6_2586 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2615, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%2616 = torch.aten.mul.Tensor %2615, %110 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2616, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_2587 = torch.constant.int 15 | |
%2617 = torch.prims.convert_element_type %2616, %int15_2587 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2617, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int6_2588 = torch.constant.int 6 | |
%2618 = torch.prims.convert_element_type %2614, %int6_2588 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2618, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%2619 = torch.aten.mul.Tensor %2618, %110 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2619, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_2589 = torch.constant.int 15 | |
%2620 = torch.prims.convert_element_type %2619, %int15_2589 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2620, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_2590 = torch.constant.int 1 | |
%int2_2591 = torch.constant.int 2 | |
%2621 = torch.aten.transpose.int %2490, %int1_2590, %int2_2591 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2621, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2592 = torch.constant.int 1 | |
%int2_2593 = torch.constant.int 2 | |
%2622 = torch.aten.transpose.int %2617, %int1_2592, %int2_2593 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2622, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2594 = torch.constant.int 1 | |
%int2_2595 = torch.constant.int 2 | |
%2623 = torch.aten.transpose.int %2620, %int1_2594, %int2_2595 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2623, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%float0.000000e00_2596 = torch.constant.float 0.000000e+00 | |
%true_2597 = torch.constant.bool true | |
%none_2598 = torch.constant.none | |
%none_2599 = torch.constant.none | |
%2624:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%2621, %2622, %2623, %float0.000000e00_2596, %true_2597, %none_2598, %none_2599) : (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?],f32>) | |
torch.bind_symbolic_shape %2624#0, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2600 = torch.constant.int 1 | |
%int2_2601 = torch.constant.int 2 | |
%2625 = torch.aten.transpose.int %2624#0, %int1_2600, %int2_2601 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2625, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_2602 = torch.constant.int 1 | |
%int4096_2603 = torch.constant.int 4096 | |
%2626 = torch.prim.ListConstruct %int1_2602, %2460, %int4096_2603 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2627 = torch.aten.view %2625, %2626 : !torch.vtensor<[1,?,32,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2627, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2628 = torch.aten.div.Tensor %2627, %111 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2628, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2604 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2605 = torch.constant.float 2.400000e+02 | |
%2629 = torch.aten.clamp %2628, %float-2.400000e02_2604, %float2.400000e02_2605 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2629, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2606 = torch.constant.int 26 | |
%2630 = torch.prims.convert_element_type %2629, %int26_2606 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2630, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2607 = torch.constant.int -2 | |
%int-1_2608 = torch.constant.int -1 | |
%2631 = torch.aten.transpose.int %112, %int-2_2607, %int-1_2608 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_2609 = torch.constant.int 4096 | |
%2632 = torch.prim.ListConstruct %2460, %int4096_2609 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2633 = torch.aten.view %2630, %2632 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2633, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2634 = torch.aten.mm %2633, %2631 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2634, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_2610 = torch.constant.int 1 | |
%int4096_2611 = torch.constant.int 4096 | |
%2635 = torch.prim.ListConstruct %int1_2610, %2460, %int4096_2611 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2636 = torch.aten.view %2634, %2635 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2636, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_2612 = torch.constant.int 15 | |
%2637 = torch.prims.convert_element_type %2636, %int15_2612 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2637, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_2613 = torch.constant.int 1 | |
%2638 = torch.aten.add.Tensor %2377, %2637, %int1_2613 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2638, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_2614 = torch.constant.int 6 | |
%2639 = torch.prims.convert_element_type %2638, %int6_2614 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2639, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_2615 = torch.constant.int 2 | |
%2640 = torch.aten.pow.Tensor_Scalar %2639, %int2_2615 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2640, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_2616 = torch.constant.int -1 | |
%2641 = torch.prim.ListConstruct %int-1_2616 : (!torch.int) -> !torch.list<int> | |
%true_2617 = torch.constant.bool true | |
%none_2618 = torch.constant.none | |
%2642 = torch.aten.mean.dim %2640, %2641, %true_2617, %none_2618 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2642, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_2619 = torch.constant.float 1.000000e-05 | |
%int1_2620 = torch.constant.int 1 | |
%2643 = torch.aten.add.Scalar %2642, %float1.000000e-05_2619, %int1_2620 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2643, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2644 = torch.aten.rsqrt %2643 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2644, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2645 = torch.aten.mul.Tensor %2639, %2644 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2645, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_2621 = torch.constant.int 15 | |
%2646 = torch.prims.convert_element_type %2645, %int15_2621 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2646, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2647 = torch.aten.mul.Tensor %113, %2646 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2647, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2648 = torch.aten.div.Tensor %2647, %114 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2648, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2622 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2623 = torch.constant.float 2.400000e+02 | |
%2649 = torch.aten.clamp %2648, %float-2.400000e02_2622, %float2.400000e02_2623 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2649, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2624 = torch.constant.int 26 | |
%2650 = torch.prims.convert_element_type %2649, %int26_2624 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2650, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2625 = torch.constant.int -2 | |
%int-1_2626 = torch.constant.int -1 | |
%2651 = torch.aten.transpose.int %115, %int-2_2625, %int-1_2626 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_2627 = torch.constant.int 4096 | |
%2652 = torch.prim.ListConstruct %564, %int4096_2627 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2653 = torch.aten.view %2650, %2652 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2653, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2654 = torch.aten.mm %2653, %2651 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2654, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_2628 = torch.constant.int 1 | |
%int14336_2629 = torch.constant.int 14336 | |
%2655 = torch.prim.ListConstruct %int1_2628, %564, %int14336_2629 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2656 = torch.aten.view %2654, %2655 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2656, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_2630 = torch.constant.int 15 | |
%2657 = torch.prims.convert_element_type %2656, %int15_2630 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2657, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2658 = torch.aten.silu %2657 : !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2658, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2659 = torch.aten.div.Tensor %2647, %116 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2659, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2631 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2632 = torch.constant.float 2.400000e+02 | |
%2660 = torch.aten.clamp %2659, %float-2.400000e02_2631, %float2.400000e02_2632 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2660, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2633 = torch.constant.int 26 | |
%2661 = torch.prims.convert_element_type %2660, %int26_2633 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2661, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2634 = torch.constant.int -2 | |
%int-1_2635 = torch.constant.int -1 | |
%2662 = torch.aten.transpose.int %117, %int-2_2634, %int-1_2635 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_2636 = torch.constant.int 4096 | |
%2663 = torch.prim.ListConstruct %564, %int4096_2636 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2664 = torch.aten.view %2661, %2663 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2664, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2665 = torch.aten.mm %2664, %2662 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2665, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_2637 = torch.constant.int 1 | |
%int14336_2638 = torch.constant.int 14336 | |
%2666 = torch.prim.ListConstruct %int1_2637, %564, %int14336_2638 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2667 = torch.aten.view %2665, %2666 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2667, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_2639 = torch.constant.int 15 | |
%2668 = torch.prims.convert_element_type %2667, %int15_2639 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2668, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2669 = torch.aten.mul.Tensor %2658, %2668 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2669, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2670 = torch.aten.div.Tensor %2669, %118 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2670, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%float-2.400000e02_2640 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2641 = torch.constant.float 2.400000e+02 | |
%2671 = torch.aten.clamp %2670, %float-2.400000e02_2640, %float2.400000e02_2641 : !torch.vtensor<[1,?,14336],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2671, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%int26_2642 = torch.constant.int 26 | |
%2672 = torch.prims.convert_element_type %2671, %int26_2642 : !torch.vtensor<[1,?,14336],bf16>, !torch.int -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2672, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int-2_2643 = torch.constant.int -2 | |
%int-1_2644 = torch.constant.int -1 | |
%2673 = torch.aten.transpose.int %119, %int-2_2643, %int-1_2644 : !torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%int1_2645 = torch.constant.int 1 | |
%2674 = torch.aten.size.int %2656, %int1_2645 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int14336_2646 = torch.constant.int 14336 | |
%2675 = torch.prim.ListConstruct %2674, %int14336_2646 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2676 = torch.aten.view %2672, %2675 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2676, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%2677 = torch.aten.mm %2676, %2673 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.vtensor<[14336,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2677, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_2647 = torch.constant.int 1 | |
%int4096_2648 = torch.constant.int 4096 | |
%2678 = torch.prim.ListConstruct %int1_2647, %2674, %int4096_2648 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2679 = torch.aten.view %2677, %2678 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2679, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_2649 = torch.constant.int 15 | |
%2680 = torch.prims.convert_element_type %2679, %int15_2649 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2680, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_2650 = torch.constant.int 1 | |
%2681 = torch.aten.add.Tensor %2638, %2680, %int1_2650 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2681, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_2651 = torch.constant.int 6 | |
%2682 = torch.prims.convert_element_type %2681, %int6_2651 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2682, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_2652 = torch.constant.int 2 | |
%2683 = torch.aten.pow.Tensor_Scalar %2682, %int2_2652 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2683, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_2653 = torch.constant.int -1 | |
%2684 = torch.prim.ListConstruct %int-1_2653 : (!torch.int) -> !torch.list<int> | |
%true_2654 = torch.constant.bool true | |
%none_2655 = torch.constant.none | |
%2685 = torch.aten.mean.dim %2683, %2684, %true_2654, %none_2655 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2685, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_2656 = torch.constant.float 1.000000e-05 | |
%int1_2657 = torch.constant.int 1 | |
%2686 = torch.aten.add.Scalar %2685, %float1.000000e-05_2656, %int1_2657 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2686, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2687 = torch.aten.rsqrt %2686 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2687, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2688 = torch.aten.mul.Tensor %2682, %2687 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2688, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_2658 = torch.constant.int 15 | |
%2689 = torch.prims.convert_element_type %2688, %int15_2658 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2689, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2690 = torch.aten.mul.Tensor %120, %2689 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2690, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2691 = torch.aten.div.Tensor %2690, %121 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2691, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2659 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2660 = torch.constant.float 2.400000e+02 | |
%2692 = torch.aten.clamp %2691, %float-2.400000e02_2659, %float2.400000e02_2660 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2692, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2661 = torch.constant.int 26 | |
%2693 = torch.prims.convert_element_type %2692, %int26_2661 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2693, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2662 = torch.constant.int -2 | |
%int-1_2663 = torch.constant.int -1 | |
%2694 = torch.aten.transpose.int %122, %int-2_2662, %int-1_2663 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_2664 = torch.constant.int 4096 | |
%2695 = torch.prim.ListConstruct %564, %int4096_2664 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2696 = torch.aten.view %2693, %2695 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2696, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2697 = torch.aten.mm %2696, %2694 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2697, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_2665 = torch.constant.int 1 | |
%int4096_2666 = torch.constant.int 4096 | |
%2698 = torch.prim.ListConstruct %int1_2665, %564, %int4096_2666 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2699 = torch.aten.view %2697, %2698 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2699, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_2667 = torch.constant.int 15 | |
%2700 = torch.prims.convert_element_type %2699, %int15_2667 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2700, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2701 = torch.aten.div.Tensor %2690, %123 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2701, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2668 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2669 = torch.constant.float 2.400000e+02 | |
%2702 = torch.aten.clamp %2701, %float-2.400000e02_2668, %float2.400000e02_2669 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2702, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2670 = torch.constant.int 26 | |
%2703 = torch.prims.convert_element_type %2702, %int26_2670 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2703, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2671 = torch.constant.int -2 | |
%int-1_2672 = torch.constant.int -1 | |
%2704 = torch.aten.transpose.int %124, %int-2_2671, %int-1_2672 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_2673 = torch.constant.int 4096 | |
%2705 = torch.prim.ListConstruct %564, %int4096_2673 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2706 = torch.aten.view %2703, %2705 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2706, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2707 = torch.aten.mm %2706, %2704 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2707, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_2674 = torch.constant.int 1 | |
%int1024_2675 = torch.constant.int 1024 | |
%2708 = torch.prim.ListConstruct %int1_2674, %564, %int1024_2675 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2709 = torch.aten.view %2707, %2708 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2709, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_2676 = torch.constant.int 15 | |
%2710 = torch.prims.convert_element_type %2709, %int15_2676 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %2710, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%2711 = torch.aten.div.Tensor %2690, %125 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2711, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2677 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2678 = torch.constant.float 2.400000e+02 | |
%2712 = torch.aten.clamp %2711, %float-2.400000e02_2677, %float2.400000e02_2678 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2712, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2679 = torch.constant.int 26 | |
%2713 = torch.prims.convert_element_type %2712, %int26_2679 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2713, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2680 = torch.constant.int -2 | |
%int-1_2681 = torch.constant.int -1 | |
%2714 = torch.aten.transpose.int %126, %int-2_2680, %int-1_2681 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_2682 = torch.constant.int 4096 | |
%2715 = torch.prim.ListConstruct %564, %int4096_2682 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2716 = torch.aten.view %2713, %2715 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2716, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2717 = torch.aten.mm %2716, %2714 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2717, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_2683 = torch.constant.int 1 | |
%int1024_2684 = torch.constant.int 1024 | |
%2718 = torch.prim.ListConstruct %int1_2683, %564, %int1024_2684 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2719 = torch.aten.view %2717, %2718 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2719, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_2685 = torch.constant.int 15 | |
%2720 = torch.prims.convert_element_type %2719, %int15_2685 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %2720, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%int1_2686 = torch.constant.int 1 | |
%int32_2687 = torch.constant.int 32 | |
%int128_2688 = torch.constant.int 128 | |
%2721 = torch.prim.ListConstruct %int1_2686, %564, %int32_2687, %int128_2688 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2722 = torch.aten.view %2700, %2721 : !torch.vtensor<[1,?,4096],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2722, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_2689 = torch.constant.int 1 | |
%int8_2690 = torch.constant.int 8 | |
%int128_2691 = torch.constant.int 128 | |
%2723 = torch.prim.ListConstruct %int1_2689, %564, %int8_2690, %int128_2691 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2724 = torch.aten.view %2710, %2723 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2724, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int1_2692 = torch.constant.int 1 | |
%int8_2693 = torch.constant.int 8 | |
%int128_2694 = torch.constant.int 128 | |
%2725 = torch.prim.ListConstruct %int1_2692, %564, %int8_2693, %int128_2694 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2726 = torch.aten.view %2720, %2725 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2726, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int131072_2695 = torch.constant.int 131072 | |
%none_2696 = torch.constant.none | |
%none_2697 = torch.constant.none | |
%cpu_2698 = torch.constant.device "cpu" | |
%false_2699 = torch.constant.bool false | |
%2727 = torch.aten.arange %int131072_2695, %none_2696, %none_2697, %cpu_2698, %false_2699 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_2700 = torch.constant.int 0 | |
%int128_2701 = torch.constant.int 128 | |
%int2_2702 = torch.constant.int 2 | |
%int4_2703 = torch.constant.int 4 | |
%none_2704 = torch.constant.none | |
%cpu_2705 = torch.constant.device "cpu" | |
%false_2706 = torch.constant.bool false | |
%2728 = torch.aten.arange.start_step %int0_2700, %int128_2701, %int2_2702, %int4_2703, %none_2704, %cpu_2705, %false_2706 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_2707 = torch.constant.int 6 | |
%2729 = torch.prims.convert_element_type %2728, %int6_2707 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_2708 = torch.constant.int 128 | |
%2730 = torch.aten.div.Scalar %2729, %int128_2708 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_2709 = torch.constant.float 5.000000e+05 | |
%2731 = torch.aten.pow.Scalar %float5.000000e05_2709, %2730 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2732 = torch.aten.reciprocal %2731 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_2710 = torch.constant.float 1.000000e+00 | |
%2733 = torch.aten.mul.Scalar %2732, %float1.000000e00_2710 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%2734 = torch.aten.reciprocal %2733 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_2711 = torch.constant.float 6.2831853071795862 | |
%2735 = torch.aten.mul.Scalar %2734, %float6.283190e00_2711 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_2712 = torch.constant.float 8.192000e+03 | |
%2736 = torch.aten.gt.Scalar %2735, %float8.192000e03_2712 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_2713 = torch.constant.int 8 | |
%2737 = torch.aten.div.Scalar %2733, %int8_2713 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2738 = torch.aten.where.self %2736, %2737, %2733 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2739 = torch.aten.reciprocal %2735 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_2714 = torch.constant.int 8192 | |
%2740 = torch.aten.mul.Scalar %2739, %int8192_2714 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2715 = torch.constant.int 1 | |
%int1_2716 = torch.constant.int 1 | |
%2741 = torch.aten.sub.Scalar %2740, %int1_2715, %int1_2716 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_2717 = torch.constant.int 3 | |
%2742 = torch.aten.div.Scalar %2741, %int3_2717 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2718 = torch.constant.int 1 | |
%int1_2719 = torch.constant.int 1 | |
%2743 = torch.aten.rsub.Scalar %2742, %int1_2718, %int1_2719 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%2744 = torch.aten.mul.Tensor %2743, %2738 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_2720 = torch.constant.int 8 | |
%2745 = torch.aten.div.Scalar %2744, %int8_2720 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2746 = torch.aten.mul.Tensor %2742, %2738 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_2721 = torch.constant.int 1 | |
%2747 = torch.aten.add.Tensor %2745, %2746, %int1_2721 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_2722 = torch.constant.float 2.048000e+03 | |
%2748 = torch.aten.lt.Scalar %2735, %float2.048000e03_2722 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2749 = torch.aten.bitwise_not %2748 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_2723 = torch.constant.float 8.192000e+03 | |
%2750 = torch.aten.gt.Scalar %2735, %float8.192000e03_2723 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2751 = torch.aten.bitwise_not %2750 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2752 = torch.aten.mul.Tensor %2749, %2751 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2753 = torch.aten.where.self %2752, %2747, %2738 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2754 = torch.prim.ListConstruct %2753, %2753 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_2724 = torch.constant.int -1 | |
%2755 = torch.aten.cat %2754, %int-1_2724 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_2725 = torch.constant.int 6 | |
%2756 = torch.prims.convert_element_type %2727, %int6_2725 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_2726 = torch.constant.int 131072 | |
%int1_2727 = torch.constant.int 1 | |
%2757 = torch.prim.ListConstruct %int131072_2726, %int1_2727 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2758 = torch.aten.view %2756, %2757 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%2759 = torch.aten.mul.Tensor %2758, %2755 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%2760 = torch.aten.cos %2759 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2728 = torch.constant.int 15 | |
%2761 = torch.prims.convert_element_type %2760, %int15_2728 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%2762 = torch.aten.sin %2759 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2729 = torch.constant.int 15 | |
%2763 = torch.prims.convert_element_type %2762, %int15_2729 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_2730 = torch.constant.int 1 | |
%2764 = torch.aten.size.int %2699, %int1_2730 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_2731 = torch.constant.int 0 | |
%2765 = torch.aten.add.int %int0_2731, %2764 : !torch.int, !torch.int -> !torch.int | |
%int0_2732 = torch.constant.int 0 | |
%int0_2733 = torch.constant.int 0 | |
%int1_2734 = torch.constant.int 1 | |
%2766 = torch.aten.slice.Tensor %2761, %int0_2732, %int0_2733, %2765, %int1_2734 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2766, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2735 = torch.constant.int 1 | |
%int0_2736 = torch.constant.int 0 | |
%int9223372036854775807_2737 = torch.constant.int 9223372036854775807 | |
%int1_2738 = torch.constant.int 1 | |
%2767 = torch.aten.slice.Tensor %2766, %int1_2735, %int0_2736, %int9223372036854775807_2737, %int1_2738 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2767, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2739 = torch.constant.int 0 | |
%2768 = torch.aten.add.int %int0_2739, %2764 : !torch.int, !torch.int -> !torch.int | |
%int0_2740 = torch.constant.int 0 | |
%int0_2741 = torch.constant.int 0 | |
%int1_2742 = torch.constant.int 1 | |
%2769 = torch.aten.slice.Tensor %2763, %int0_2740, %int0_2741, %2768, %int1_2742 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2769, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2743 = torch.constant.int 1 | |
%int0_2744 = torch.constant.int 0 | |
%int9223372036854775807_2745 = torch.constant.int 9223372036854775807 | |
%int1_2746 = torch.constant.int 1 | |
%2770 = torch.aten.slice.Tensor %2769, %int1_2743, %int0_2744, %int9223372036854775807_2745, %int1_2746 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2770, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2747 = torch.constant.int 0 | |
%2771 = torch.aten.unsqueeze %2767, %int0_2747 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2771, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2748 = torch.constant.int 1 | |
%int0_2749 = torch.constant.int 0 | |
%int9223372036854775807_2750 = torch.constant.int 9223372036854775807 | |
%int1_2751 = torch.constant.int 1 | |
%2772 = torch.aten.slice.Tensor %2771, %int1_2748, %int0_2749, %int9223372036854775807_2750, %int1_2751 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2772, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2752 = torch.constant.int 2 | |
%2773 = torch.aten.unsqueeze %2772, %int2_2752 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2773, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2753 = torch.constant.int 3 | |
%int0_2754 = torch.constant.int 0 | |
%int9223372036854775807_2755 = torch.constant.int 9223372036854775807 | |
%int1_2756 = torch.constant.int 1 | |
%2774 = torch.aten.slice.Tensor %2773, %int3_2753, %int0_2754, %int9223372036854775807_2755, %int1_2756 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2774, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_2757 = torch.constant.int 0 | |
%2775 = torch.aten.unsqueeze %2770, %int0_2757 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2775, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2758 = torch.constant.int 1 | |
%int0_2759 = torch.constant.int 0 | |
%int9223372036854775807_2760 = torch.constant.int 9223372036854775807 | |
%int1_2761 = torch.constant.int 1 | |
%2776 = torch.aten.slice.Tensor %2775, %int1_2758, %int0_2759, %int9223372036854775807_2760, %int1_2761 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2776, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2762 = torch.constant.int 2 | |
%2777 = torch.aten.unsqueeze %2776, %int2_2762 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2777, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2763 = torch.constant.int 3 | |
%int0_2764 = torch.constant.int 0 | |
%int9223372036854775807_2765 = torch.constant.int 9223372036854775807 | |
%int1_2766 = torch.constant.int 1 | |
%2778 = torch.aten.slice.Tensor %2777, %int3_2763, %int0_2764, %int9223372036854775807_2765, %int1_2766 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2778, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_2767 = torch.constant.int 1 | |
%int2_2768 = torch.constant.int 2 | |
%2779 = torch.aten.transpose.int %2774, %int1_2767, %int2_2768 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2779, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2769 = torch.constant.int 1 | |
%int1_2770 = torch.constant.int 1 | |
%int1_2771 = torch.constant.int 1 | |
%int1_2772 = torch.constant.int 1 | |
%2780 = torch.prim.ListConstruct %int1_2769, %int1_2770, %int1_2771, %int1_2772 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2781 = torch.aten.repeat %2779, %2780 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2781, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2773 = torch.constant.int 1 | |
%int2_2774 = torch.constant.int 2 | |
%2782 = torch.aten.transpose.int %2778, %int1_2773, %int2_2774 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2782, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2775 = torch.constant.int 1 | |
%int2_2776 = torch.constant.int 2 | |
%2783 = torch.aten.transpose.int %2722, %int1_2775, %int2_2776 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2783, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2777 = torch.constant.int 1 | |
%int1_2778 = torch.constant.int 1 | |
%int1_2779 = torch.constant.int 1 | |
%int1_2780 = torch.constant.int 1 | |
%2784 = torch.prim.ListConstruct %int1_2777, %int1_2778, %int1_2779, %int1_2780 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2785 = torch.aten.repeat %2782, %2784 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2785, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%2786 = torch.aten.mul.Tensor %2783, %2781 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2786, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int3_2781 = torch.constant.int 3 | |
%int0_2782 = torch.constant.int 0 | |
%int64_2783 = torch.constant.int 64 | |
%int1_2784 = torch.constant.int 1 | |
%2787 = torch.aten.slice.Tensor %2783, %int3_2781, %int0_2782, %int64_2783, %int1_2784 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %2787, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%int3_2785 = torch.constant.int 3 | |
%int64_2786 = torch.constant.int 64 | |
%int9223372036854775807_2787 = torch.constant.int 9223372036854775807 | |
%int1_2788 = torch.constant.int 1 | |
%2788 = torch.aten.slice.Tensor %2783, %int3_2785, %int64_2786, %int9223372036854775807_2787, %int1_2788 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %2788, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%2789 = torch.aten.neg %2788 : !torch.vtensor<[1,32,?,64],bf16> -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %2789, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%2790 = torch.prim.ListConstruct %2789, %2787 : (!torch.vtensor<[1,32,?,64],bf16>, !torch.vtensor<[1,32,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_2789 = torch.constant.int -1 | |
%2791 = torch.aten.cat %2790, %int-1_2789 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2791, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%2792 = torch.aten.mul.Tensor %2791, %2785 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2792, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2790 = torch.constant.int 1 | |
%2793 = torch.aten.add.Tensor %2786, %2792, %int1_2790 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2793, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2791 = torch.constant.int 1 | |
%int2_2792 = torch.constant.int 2 | |
%2794 = torch.aten.transpose.int %2793, %int1_2791, %int2_2792 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2794, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int131072_2793 = torch.constant.int 131072 | |
%none_2794 = torch.constant.none | |
%none_2795 = torch.constant.none | |
%cpu_2796 = torch.constant.device "cpu" | |
%false_2797 = torch.constant.bool false | |
%2795 = torch.aten.arange %int131072_2793, %none_2794, %none_2795, %cpu_2796, %false_2797 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_2798 = torch.constant.int 0 | |
%int128_2799 = torch.constant.int 128 | |
%int2_2800 = torch.constant.int 2 | |
%int4_2801 = torch.constant.int 4 | |
%none_2802 = torch.constant.none | |
%cpu_2803 = torch.constant.device "cpu" | |
%false_2804 = torch.constant.bool false | |
%2796 = torch.aten.arange.start_step %int0_2798, %int128_2799, %int2_2800, %int4_2801, %none_2802, %cpu_2803, %false_2804 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_2805 = torch.constant.int 6 | |
%2797 = torch.prims.convert_element_type %2796, %int6_2805 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_2806 = torch.constant.int 128 | |
%2798 = torch.aten.div.Scalar %2797, %int128_2806 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_2807 = torch.constant.float 5.000000e+05 | |
%2799 = torch.aten.pow.Scalar %float5.000000e05_2807, %2798 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2800 = torch.aten.reciprocal %2799 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_2808 = torch.constant.float 1.000000e+00 | |
%2801 = torch.aten.mul.Scalar %2800, %float1.000000e00_2808 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%2802 = torch.aten.reciprocal %2801 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_2809 = torch.constant.float 6.2831853071795862 | |
%2803 = torch.aten.mul.Scalar %2802, %float6.283190e00_2809 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_2810 = torch.constant.float 8.192000e+03 | |
%2804 = torch.aten.gt.Scalar %2803, %float8.192000e03_2810 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_2811 = torch.constant.int 8 | |
%2805 = torch.aten.div.Scalar %2801, %int8_2811 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2806 = torch.aten.where.self %2804, %2805, %2801 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2807 = torch.aten.reciprocal %2803 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_2812 = torch.constant.int 8192 | |
%2808 = torch.aten.mul.Scalar %2807, %int8192_2812 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2813 = torch.constant.int 1 | |
%int1_2814 = torch.constant.int 1 | |
%2809 = torch.aten.sub.Scalar %2808, %int1_2813, %int1_2814 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_2815 = torch.constant.int 3 | |
%2810 = torch.aten.div.Scalar %2809, %int3_2815 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_2816 = torch.constant.int 1 | |
%int1_2817 = torch.constant.int 1 | |
%2811 = torch.aten.rsub.Scalar %2810, %int1_2816, %int1_2817 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%2812 = torch.aten.mul.Tensor %2811, %2806 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_2818 = torch.constant.int 8 | |
%2813 = torch.aten.div.Scalar %2812, %int8_2818 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%2814 = torch.aten.mul.Tensor %2810, %2806 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_2819 = torch.constant.int 1 | |
%2815 = torch.aten.add.Tensor %2813, %2814, %int1_2819 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_2820 = torch.constant.float 2.048000e+03 | |
%2816 = torch.aten.lt.Scalar %2803, %float2.048000e03_2820 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2817 = torch.aten.bitwise_not %2816 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_2821 = torch.constant.float 8.192000e+03 | |
%2818 = torch.aten.gt.Scalar %2803, %float8.192000e03_2821 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%2819 = torch.aten.bitwise_not %2818 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2820 = torch.aten.mul.Tensor %2817, %2819 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%2821 = torch.aten.where.self %2820, %2815, %2806 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%2822 = torch.prim.ListConstruct %2821, %2821 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_2822 = torch.constant.int -1 | |
%2823 = torch.aten.cat %2822, %int-1_2822 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_2823 = torch.constant.int 6 | |
%2824 = torch.prims.convert_element_type %2795, %int6_2823 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_2824 = torch.constant.int 131072 | |
%int1_2825 = torch.constant.int 1 | |
%2825 = torch.prim.ListConstruct %int131072_2824, %int1_2825 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2826 = torch.aten.view %2824, %2825 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%2827 = torch.aten.mul.Tensor %2826, %2823 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%2828 = torch.aten.cos %2827 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2826 = torch.constant.int 15 | |
%2829 = torch.prims.convert_element_type %2828, %int15_2826 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%2830 = torch.aten.sin %2827 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_2827 = torch.constant.int 15 | |
%2831 = torch.prims.convert_element_type %2830, %int15_2827 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_2828 = torch.constant.int 1 | |
%2832 = torch.aten.size.int %2709, %int1_2828 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_2829 = torch.constant.int 0 | |
%2833 = torch.aten.add.int %int0_2829, %2832 : !torch.int, !torch.int -> !torch.int | |
%int0_2830 = torch.constant.int 0 | |
%int0_2831 = torch.constant.int 0 | |
%int1_2832 = torch.constant.int 1 | |
%2834 = torch.aten.slice.Tensor %2829, %int0_2830, %int0_2831, %2833, %int1_2832 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2834, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2833 = torch.constant.int 1 | |
%int0_2834 = torch.constant.int 0 | |
%int9223372036854775807_2835 = torch.constant.int 9223372036854775807 | |
%int1_2836 = torch.constant.int 1 | |
%2835 = torch.aten.slice.Tensor %2834, %int1_2833, %int0_2834, %int9223372036854775807_2835, %int1_2836 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2835, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2837 = torch.constant.int 0 | |
%2836 = torch.aten.add.int %int0_2837, %2832 : !torch.int, !torch.int -> !torch.int | |
%int0_2838 = torch.constant.int 0 | |
%int0_2839 = torch.constant.int 0 | |
%int1_2840 = torch.constant.int 1 | |
%2837 = torch.aten.slice.Tensor %2831, %int0_2838, %int0_2839, %2836, %int1_2840 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2837, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_2841 = torch.constant.int 1 | |
%int0_2842 = torch.constant.int 0 | |
%int9223372036854775807_2843 = torch.constant.int 9223372036854775807 | |
%int1_2844 = torch.constant.int 1 | |
%2838 = torch.aten.slice.Tensor %2837, %int1_2841, %int0_2842, %int9223372036854775807_2843, %int1_2844 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %2838, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_2845 = torch.constant.int 0 | |
%2839 = torch.aten.unsqueeze %2835, %int0_2845 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2839, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2846 = torch.constant.int 1 | |
%int0_2847 = torch.constant.int 0 | |
%int9223372036854775807_2848 = torch.constant.int 9223372036854775807 | |
%int1_2849 = torch.constant.int 1 | |
%2840 = torch.aten.slice.Tensor %2839, %int1_2846, %int0_2847, %int9223372036854775807_2848, %int1_2849 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2840, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2850 = torch.constant.int 2 | |
%2841 = torch.aten.unsqueeze %2840, %int2_2850 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2841, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2851 = torch.constant.int 3 | |
%int0_2852 = torch.constant.int 0 | |
%int9223372036854775807_2853 = torch.constant.int 9223372036854775807 | |
%int1_2854 = torch.constant.int 1 | |
%2842 = torch.aten.slice.Tensor %2841, %int3_2851, %int0_2852, %int9223372036854775807_2853, %int1_2854 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2842, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_2855 = torch.constant.int 0 | |
%2843 = torch.aten.unsqueeze %2838, %int0_2855 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2843, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_2856 = torch.constant.int 1 | |
%int0_2857 = torch.constant.int 0 | |
%int9223372036854775807_2858 = torch.constant.int 9223372036854775807 | |
%int1_2859 = torch.constant.int 1 | |
%2844 = torch.aten.slice.Tensor %2843, %int1_2856, %int0_2857, %int9223372036854775807_2858, %int1_2859 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %2844, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_2860 = torch.constant.int 2 | |
%2845 = torch.aten.unsqueeze %2844, %int2_2860 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2845, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_2861 = torch.constant.int 3 | |
%int0_2862 = torch.constant.int 0 | |
%int9223372036854775807_2863 = torch.constant.int 9223372036854775807 | |
%int1_2864 = torch.constant.int 1 | |
%2846 = torch.aten.slice.Tensor %2845, %int3_2861, %int0_2862, %int9223372036854775807_2863, %int1_2864 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %2846, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_2865 = torch.constant.int 1 | |
%int2_2866 = torch.constant.int 2 | |
%2847 = torch.aten.transpose.int %2842, %int1_2865, %int2_2866 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2847, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2867 = torch.constant.int 1 | |
%int1_2868 = torch.constant.int 1 | |
%int1_2869 = torch.constant.int 1 | |
%int1_2870 = torch.constant.int 1 | |
%2848 = torch.prim.ListConstruct %int1_2867, %int1_2868, %int1_2869, %int1_2870 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2849 = torch.aten.repeat %2847, %2848 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2849, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2871 = torch.constant.int 1 | |
%int2_2872 = torch.constant.int 2 | |
%2850 = torch.aten.transpose.int %2846, %int1_2871, %int2_2872 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2850, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_2873 = torch.constant.int 1 | |
%int2_2874 = torch.constant.int 2 | |
%2851 = torch.aten.transpose.int %2724, %int1_2873, %int2_2874 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2851, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_2875 = torch.constant.int 1 | |
%int1_2876 = torch.constant.int 1 | |
%int1_2877 = torch.constant.int 1 | |
%int1_2878 = torch.constant.int 1 | |
%2852 = torch.prim.ListConstruct %int1_2875, %int1_2876, %int1_2877, %int1_2878 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2853 = torch.aten.repeat %2850, %2852 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %2853, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%2854 = torch.aten.mul.Tensor %2851, %2849 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2854, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int3_2879 = torch.constant.int 3 | |
%int0_2880 = torch.constant.int 0 | |
%int64_2881 = torch.constant.int 64 | |
%int1_2882 = torch.constant.int 1 | |
%2855 = torch.aten.slice.Tensor %2851, %int3_2879, %int0_2880, %int64_2881, %int1_2882 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %2855, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%int3_2883 = torch.constant.int 3 | |
%int64_2884 = torch.constant.int 64 | |
%int9223372036854775807_2885 = torch.constant.int 9223372036854775807 | |
%int1_2886 = torch.constant.int 1 | |
%2856 = torch.aten.slice.Tensor %2851, %int3_2883, %int64_2884, %int9223372036854775807_2885, %int1_2886 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %2856, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%2857 = torch.aten.neg %2856 : !torch.vtensor<[1,8,?,64],bf16> -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %2857, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%2858 = torch.prim.ListConstruct %2857, %2855 : (!torch.vtensor<[1,8,?,64],bf16>, !torch.vtensor<[1,8,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_2887 = torch.constant.int -1 | |
%2859 = torch.aten.cat %2858, %int-1_2887 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2859, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%2860 = torch.aten.mul.Tensor %2859, %2853 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2860, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_2888 = torch.constant.int 1 | |
%2861 = torch.aten.add.Tensor %2854, %2860, %int1_2888 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,8,?,128],bf16>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %2861, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_2889 = torch.constant.int 1 | |
%int2_2890 = torch.constant.int 2 | |
%2862 = torch.aten.transpose.int %2861, %int1_2889, %int2_2890 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2862, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%2863 = torch.aten.div.Tensor %2862, %127 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2863, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_2891 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2892 = torch.constant.float 2.400000e+02 | |
%2864 = torch.aten.clamp %2863, %float-2.400000e02_2891, %float2.400000e02_2892 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2864, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_2893 = torch.constant.int 26 | |
%2865 = torch.prims.convert_element_type %2864, %int26_2893 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2865, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%2866 = torch.aten.div.Tensor %2726, %127 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2866, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_2894 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2895 = torch.constant.float 2.400000e+02 | |
%2867 = torch.aten.clamp %2866, %float-2.400000e02_2894, %float2.400000e02_2895 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %2867, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_2896 = torch.constant.int 26 | |
%2868 = torch.prims.convert_element_type %2867, %int26_2896 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2868, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%int64_2897 = torch.constant.int 64 | |
%2869 = torch.aten.mul.Scalar %arg2, %int64_2897 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %2869, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int14 = torch.constant.int 14 | |
%int1_2898 = torch.constant.int 1 | |
%2870 = torch.aten.add.Scalar %2869, %int14, %int1_2898 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %2870, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int1_2899 = torch.constant.int 1 | |
%int32_2900 = torch.constant.int 32 | |
%int8_2901 = torch.constant.int 8 | |
%int128_2902 = torch.constant.int 128 | |
%2871 = torch.prim.ListConstruct %int1_2899, %748, %int32_2900, %int8_2901, %int128_2902 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2872 = torch.aten.view %2865, %2871 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2872, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_2903 = torch.constant.int 32 | |
%int8_2904 = torch.constant.int 8 | |
%int128_2905 = torch.constant.int 128 | |
%2873 = torch.prim.ListConstruct %748, %int32_2903, %int8_2904, %int128_2905 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2874 = torch.aten.view %2872, %2873 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2874, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%2875 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%2876 = torch.aten.view %2870, %2875 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %2876, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%int32_2906 = torch.constant.int 32 | |
%int2_2907 = torch.constant.int 2 | |
%int32_2908 = torch.constant.int 32 | |
%int8_2909 = torch.constant.int 8 | |
%int128_2910 = torch.constant.int 128 | |
%2877 = torch.prim.ListConstruct %739, %int32_2906, %int2_2907, %int32_2908, %int8_2909, %int128_2910 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2878 = torch.aten.view %2601, %2877 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2878, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_2911 = torch.constant.int 32 | |
%2879 = torch.aten.mul.int %739, %int32_2911 : !torch.int, !torch.int -> !torch.int | |
%int2_2912 = torch.constant.int 2 | |
%2880 = torch.aten.mul.int %2879, %int2_2912 : !torch.int, !torch.int -> !torch.int | |
%int32_2913 = torch.constant.int 32 | |
%int8_2914 = torch.constant.int 8 | |
%int128_2915 = torch.constant.int 128 | |
%2881 = torch.prim.ListConstruct %2880, %int32_2913, %int8_2914, %int128_2915 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2882 = torch.aten.view %2878, %2881 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2882, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%2883 = torch.prim.ListConstruct %2876 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_2916 = torch.constant.bool false | |
%2884 = torch.aten.index_put %2882, %2883, %2874, %false_2916 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2884, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_2917 = torch.constant.int 32 | |
%int2_2918 = torch.constant.int 2 | |
%int32_2919 = torch.constant.int 32 | |
%int8_2920 = torch.constant.int 8 | |
%int128_2921 = torch.constant.int 128 | |
%2885 = torch.prim.ListConstruct %739, %int32_2917, %int2_2918, %int32_2919, %int8_2920, %int128_2921 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2886 = torch.aten.view %2884, %2885 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2886, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_2922 = torch.constant.int 2097152 | |
%2887 = torch.prim.ListConstruct %739, %int2097152_2922 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2888 = torch.aten.view %2886, %2887 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2888, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int32_2923 = torch.constant.int 32 | |
%int2_2924 = torch.constant.int 2 | |
%int32_2925 = torch.constant.int 32 | |
%int8_2926 = torch.constant.int 8 | |
%int128_2927 = torch.constant.int 128 | |
%2889 = torch.prim.ListConstruct %739, %int32_2923, %int2_2924, %int32_2925, %int8_2926, %int128_2927 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2890 = torch.aten.view %2888, %2889 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2890, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_2928 = torch.constant.int 32 | |
%int8_2929 = torch.constant.int 8 | |
%int128_2930 = torch.constant.int 128 | |
%2891 = torch.prim.ListConstruct %2880, %int32_2928, %int8_2929, %int128_2930 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2892 = torch.aten.view %2890, %2891 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2892, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_2931 = torch.constant.int 1 | |
%int32_2932 = torch.constant.int 32 | |
%int8_2933 = torch.constant.int 8 | |
%int128_2934 = torch.constant.int 128 | |
%2893 = torch.prim.ListConstruct %int1_2931, %748, %int32_2932, %int8_2933, %int128_2934 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2894 = torch.aten.view %2868, %2893 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2894, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_2935 = torch.constant.int 32 | |
%int8_2936 = torch.constant.int 8 | |
%int128_2937 = torch.constant.int 128 | |
%2895 = torch.prim.ListConstruct %748, %int32_2935, %int8_2936, %int128_2937 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2896 = torch.aten.view %2894, %2895 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2896, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_2938 = torch.constant.int 1 | |
%int1_2939 = torch.constant.int 1 | |
%2897 = torch.aten.add.Scalar %2870, %int1_2938, %int1_2939 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %2897, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%2898 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%2899 = torch.aten.view %2897, %2898 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %2899, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%2900 = torch.prim.ListConstruct %2899 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_2940 = torch.constant.bool false | |
%2901 = torch.aten.index_put %2892, %2900, %2896, %false_2940 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2901, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_2941 = torch.constant.int 32 | |
%int2_2942 = torch.constant.int 2 | |
%int32_2943 = torch.constant.int 32 | |
%int8_2944 = torch.constant.int 8 | |
%int128_2945 = torch.constant.int 128 | |
%2902 = torch.prim.ListConstruct %739, %int32_2941, %int2_2942, %int32_2943, %int8_2944, %int128_2945 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2903 = torch.aten.view %2901, %2902 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2903, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_2946 = torch.constant.int 2097152 | |
%2904 = torch.prim.ListConstruct %739, %int2097152_2946 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2905 = torch.aten.view %2903, %2904 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2905, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int-2_2947 = torch.constant.int -2 | |
%2906 = torch.aten.unsqueeze %2865, %int-2_2947 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2906, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_2948 = torch.constant.int 1 | |
%int8_2949 = torch.constant.int 8 | |
%int4_2950 = torch.constant.int 4 | |
%int128_2951 = torch.constant.int 128 | |
%2907 = torch.prim.ListConstruct %int1_2948, %2832, %int8_2949, %int4_2950, %int128_2951 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_2952 = torch.constant.bool false | |
%2908 = torch.aten.expand %2906, %2907, %false_2952 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2908, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_2953 = torch.constant.int 0 | |
%2909 = torch.aten.clone %2908, %int0_2953 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2909, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_2954 = torch.constant.int 1 | |
%int32_2955 = torch.constant.int 32 | |
%int128_2956 = torch.constant.int 128 | |
%2910 = torch.prim.ListConstruct %int1_2954, %2832, %int32_2955, %int128_2956 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2911 = torch.aten._unsafe_view %2909, %2910 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2911, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int-2_2957 = torch.constant.int -2 | |
%2912 = torch.aten.unsqueeze %2868, %int-2_2957 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2912, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 1, 128)> : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ> | |
%int1_2958 = torch.constant.int 1 | |
%2913 = torch.aten.size.int %2719, %int1_2958 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int1_2959 = torch.constant.int 1 | |
%int8_2960 = torch.constant.int 8 | |
%int4_2961 = torch.constant.int 4 | |
%int128_2962 = torch.constant.int 128 | |
%2914 = torch.prim.ListConstruct %int1_2959, %2913, %int8_2960, %int4_2961, %int128_2962 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%false_2963 = torch.constant.bool false | |
%2915 = torch.aten.expand %2912, %2914, %false_2963 : !torch.vtensor<[1,?,8,1,128],f8E4M3FNUZ>, !torch.list<int>, !torch.bool -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2915, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int0_2964 = torch.constant.int 0 | |
%2916 = torch.aten.clone %2915, %int0_2964 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2916, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 4, 128)> : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ> | |
%int1_2965 = torch.constant.int 1 | |
%int32_2966 = torch.constant.int 32 | |
%int128_2967 = torch.constant.int 128 | |
%2917 = torch.prim.ListConstruct %int1_2965, %2913, %int32_2966, %int128_2967 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2918 = torch.aten._unsafe_view %2916, %2917 : !torch.vtensor<[1,?,8,4,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2918, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ> | |
%int6_2968 = torch.constant.int 6 | |
%2919 = torch.prims.convert_element_type %2911, %int6_2968 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2919, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%2920 = torch.aten.mul.Tensor %2919, %127 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2920, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_2969 = torch.constant.int 15 | |
%2921 = torch.prims.convert_element_type %2920, %int15_2969 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2921, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int6_2970 = torch.constant.int 6 | |
%2922 = torch.prims.convert_element_type %2918, %int6_2970 : !torch.vtensor<[1,?,32,128],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2922, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%2923 = torch.aten.mul.Tensor %2922, %127 : !torch.vtensor<[1,?,32,128],f32>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,32,128],f32> | |
torch.bind_symbolic_shape %2923, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],f32> | |
%int15_2971 = torch.constant.int 15 | |
%2924 = torch.prims.convert_element_type %2923, %int15_2971 : !torch.vtensor<[1,?,32,128],f32>, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2924, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_2972 = torch.constant.int 1 | |
%int2_2973 = torch.constant.int 2 | |
%2925 = torch.aten.transpose.int %2794, %int1_2972, %int2_2973 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2925, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2974 = torch.constant.int 1 | |
%int2_2975 = torch.constant.int 2 | |
%2926 = torch.aten.transpose.int %2921, %int1_2974, %int2_2975 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2926, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2976 = torch.constant.int 1 | |
%int2_2977 = torch.constant.int 2 | |
%2927 = torch.aten.transpose.int %2924, %int1_2976, %int2_2977 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %2927, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%float0.000000e00_2978 = torch.constant.float 0.000000e+00 | |
%true_2979 = torch.constant.bool true | |
%none_2980 = torch.constant.none | |
%none_2981 = torch.constant.none | |
%2928:2 = torch.operator "torch.aten._scaled_dot_product_flash_attention_for_cpu"(%2925, %2926, %2927, %float0.000000e00_2978, %true_2979, %none_2980, %none_2981) : (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.float, !torch.bool, !torch.none, !torch.none) -> (!torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?],f32>) | |
torch.bind_symbolic_shape %2928#0, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_2982 = torch.constant.int 1 | |
%int2_2983 = torch.constant.int 2 | |
%2929 = torch.aten.transpose.int %2928#0, %int1_2982, %int2_2983 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %2929, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_2984 = torch.constant.int 1 | |
%int4096_2985 = torch.constant.int 4096 | |
%2930 = torch.prim.ListConstruct %int1_2984, %2764, %int4096_2985 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2931 = torch.aten.view %2929, %2930 : !torch.vtensor<[1,?,32,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2931, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2932 = torch.aten.div.Tensor %2931, %128 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2932, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_2986 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_2987 = torch.constant.float 2.400000e+02 | |
%2933 = torch.aten.clamp %2932, %float-2.400000e02_2986, %float2.400000e02_2987 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2933, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_2988 = torch.constant.int 26 | |
%2934 = torch.prims.convert_element_type %2933, %int26_2988 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2934, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_2989 = torch.constant.int -2 | |
%int-1_2990 = torch.constant.int -1 | |
%2935 = torch.aten.transpose.int %129, %int-2_2989, %int-1_2990 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_2991 = torch.constant.int 4096 | |
%2936 = torch.prim.ListConstruct %2764, %int4096_2991 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2937 = torch.aten.view %2934, %2936 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2937, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2938 = torch.aten.mm %2937, %2935 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2938, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_2992 = torch.constant.int 1 | |
%int4096_2993 = torch.constant.int 4096 | |
%2939 = torch.prim.ListConstruct %int1_2992, %2764, %int4096_2993 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2940 = torch.aten.view %2938, %2939 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2940, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_2994 = torch.constant.int 15 | |
%2941 = torch.prims.convert_element_type %2940, %int15_2994 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2941, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_2995 = torch.constant.int 1 | |
%2942 = torch.aten.add.Tensor %2681, %2941, %int1_2995 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2942, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_2996 = torch.constant.int 6 | |
%2943 = torch.prims.convert_element_type %2942, %int6_2996 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2943, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_2997 = torch.constant.int 2 | |
%2944 = torch.aten.pow.Tensor_Scalar %2943, %int2_2997 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2944, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_2998 = torch.constant.int -1 | |
%2945 = torch.prim.ListConstruct %int-1_2998 : (!torch.int) -> !torch.list<int> | |
%true_2999 = torch.constant.bool true | |
%none_3000 = torch.constant.none | |
%2946 = torch.aten.mean.dim %2944, %2945, %true_2999, %none_3000 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2946, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_3001 = torch.constant.float 1.000000e-05 | |
%int1_3002 = torch.constant.int 1 | |
%2947 = torch.aten.add.Scalar %2946, %float1.000000e-05_3001, %int1_3002 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2947, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2948 = torch.aten.rsqrt %2947 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2948, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2949 = torch.aten.mul.Tensor %2943, %2948 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2949, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_3003 = torch.constant.int 15 | |
%2950 = torch.prims.convert_element_type %2949, %int15_3003 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2950, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2951 = torch.aten.mul.Tensor %130, %2950 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2951, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2952 = torch.aten.div.Tensor %2951, %131 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2952, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_3004 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_3005 = torch.constant.float 2.400000e+02 | |
%2953 = torch.aten.clamp %2952, %float-2.400000e02_3004, %float2.400000e02_3005 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2953, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_3006 = torch.constant.int 26 | |
%2954 = torch.prims.convert_element_type %2953, %int26_3006 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2954, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_3007 = torch.constant.int -2 | |
%int-1_3008 = torch.constant.int -1 | |
%2955 = torch.aten.transpose.int %132, %int-2_3007, %int-1_3008 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_3009 = torch.constant.int 4096 | |
%2956 = torch.prim.ListConstruct %564, %int4096_3009 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2957 = torch.aten.view %2954, %2956 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2957, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2958 = torch.aten.mm %2957, %2955 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2958, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_3010 = torch.constant.int 1 | |
%int14336_3011 = torch.constant.int 14336 | |
%2959 = torch.prim.ListConstruct %int1_3010, %564, %int14336_3011 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2960 = torch.aten.view %2958, %2959 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2960, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_3012 = torch.constant.int 15 | |
%2961 = torch.prims.convert_element_type %2960, %int15_3012 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2961, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2962 = torch.aten.silu %2961 : !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2962, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2963 = torch.aten.div.Tensor %2951, %133 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2963, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_3013 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_3014 = torch.constant.float 2.400000e+02 | |
%2964 = torch.aten.clamp %2963, %float-2.400000e02_3013, %float2.400000e02_3014 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2964, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_3015 = torch.constant.int 26 | |
%2965 = torch.prims.convert_element_type %2964, %int26_3015 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2965, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_3016 = torch.constant.int -2 | |
%int-1_3017 = torch.constant.int -1 | |
%2966 = torch.aten.transpose.int %134, %int-2_3016, %int-1_3017 : !torch.vtensor<[14336,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,14336],f8E4M3FNUZ> | |
%int4096_3018 = torch.constant.int 4096 | |
%2967 = torch.prim.ListConstruct %564, %int4096_3018 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2968 = torch.aten.view %2965, %2967 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2968, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%2969 = torch.aten.mm %2968, %2966 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,14336],f8E4M3FNUZ> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2969, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%int1_3019 = torch.constant.int 1 | |
%int14336_3020 = torch.constant.int 14336 | |
%2970 = torch.prim.ListConstruct %int1_3019, %564, %int14336_3020 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2971 = torch.aten.view %2969, %2970 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2971, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int15_3021 = torch.constant.int 15 | |
%2972 = torch.prims.convert_element_type %2971, %int15_3021 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2972, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2973 = torch.aten.mul.Tensor %2962, %2972 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[1,?,14336],bf16> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2973, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%2974 = torch.aten.div.Tensor %2973, %135 : !torch.vtensor<[1,?,14336],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2974, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%float-2.400000e02_3022 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_3023 = torch.constant.float 2.400000e+02 | |
%2975 = torch.aten.clamp %2974, %float-2.400000e02_3022, %float2.400000e02_3023 : !torch.vtensor<[1,?,14336],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,14336],bf16> | |
torch.bind_symbolic_shape %2975, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],bf16> | |
%int26_3024 = torch.constant.int 26 | |
%2976 = torch.prims.convert_element_type %2975, %int26_3024 : !torch.vtensor<[1,?,14336],bf16>, !torch.int -> !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2976, [%548], affine_map<()[s0] -> (1, s0 * 32, 14336)> : !torch.vtensor<[1,?,14336],f8E4M3FNUZ> | |
%int-2_3025 = torch.constant.int -2 | |
%int-1_3026 = torch.constant.int -1 | |
%2977 = torch.aten.transpose.int %136, %int-2_3025, %int-1_3026 : !torch.vtensor<[4096,14336],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[14336,4096],f8E4M3FNUZ> | |
%int1_3027 = torch.constant.int 1 | |
%2978 = torch.aten.size.int %2960, %int1_3027 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int14336_3028 = torch.constant.int 14336 | |
%2979 = torch.prim.ListConstruct %2978, %int14336_3028 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2980 = torch.aten.view %2976, %2979 : !torch.vtensor<[1,?,14336],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2980, [%548], affine_map<()[s0] -> (s0 * 32, 14336)> : !torch.vtensor<[?,14336],f8E4M3FNUZ> | |
%2981 = torch.aten.mm %2980, %2977 : !torch.vtensor<[?,14336],f8E4M3FNUZ>, !torch.vtensor<[14336,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2981, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_3029 = torch.constant.int 1 | |
%int4096_3030 = torch.constant.int 4096 | |
%2982 = torch.prim.ListConstruct %int1_3029, %2978, %int4096_3030 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2983 = torch.aten.view %2981, %2982 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2983, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_3031 = torch.constant.int 15 | |
%2984 = torch.prims.convert_element_type %2983, %int15_3031 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2984, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int1_3032 = torch.constant.int 1 | |
%2985 = torch.aten.add.Tensor %2942, %2984, %int1_3032 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2985, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int6_3033 = torch.constant.int 6 | |
%2986 = torch.prims.convert_element_type %2985, %int6_3033 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2986, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int2_3034 = torch.constant.int 2 | |
%2987 = torch.aten.pow.Tensor_Scalar %2986, %int2_3034 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2987, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int-1_3035 = torch.constant.int -1 | |
%2988 = torch.prim.ListConstruct %int-1_3035 : (!torch.int) -> !torch.list<int> | |
%true_3036 = torch.constant.bool true | |
%none_3037 = torch.constant.none | |
%2989 = torch.aten.mean.dim %2987, %2988, %true_3036, %none_3037 : !torch.vtensor<[1,?,4096],f32>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2989, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%float1.000000e-05_3038 = torch.constant.float 1.000000e-05 | |
%int1_3039 = torch.constant.int 1 | |
%2990 = torch.aten.add.Scalar %2989, %float1.000000e-05_3038, %int1_3039 : !torch.vtensor<[1,?,1],f32>, !torch.float, !torch.int -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2990, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2991 = torch.aten.rsqrt %2990 : !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,1],f32> | |
torch.bind_symbolic_shape %2991, [%548], affine_map<()[s0] -> (1, s0 * 32, 1)> : !torch.vtensor<[1,?,1],f32> | |
%2992 = torch.aten.mul.Tensor %2986, %2991 : !torch.vtensor<[1,?,4096],f32>, !torch.vtensor<[1,?,1],f32> -> !torch.vtensor<[1,?,4096],f32> | |
torch.bind_symbolic_shape %2992, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f32> | |
%int15_3040 = torch.constant.int 15 | |
%2993 = torch.prims.convert_element_type %2992, %int15_3040 : !torch.vtensor<[1,?,4096],f32>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2993, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2994 = torch.aten.mul.Tensor %137, %2993 : !torch.vtensor<[4096],bf16>, !torch.vtensor<[1,?,4096],bf16> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2994, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%2995 = torch.aten.div.Tensor %2994, %138 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2995, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_3041 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_3042 = torch.constant.float 2.400000e+02 | |
%2996 = torch.aten.clamp %2995, %float-2.400000e02_3041, %float2.400000e02_3042 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %2996, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_3043 = torch.constant.int 26 | |
%2997 = torch.prims.convert_element_type %2996, %int26_3043 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %2997, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_3044 = torch.constant.int -2 | |
%int-1_3045 = torch.constant.int -1 | |
%2998 = torch.aten.transpose.int %139, %int-2_3044, %int-1_3045 : !torch.vtensor<[4096,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,4096],f8E4M3FNUZ> | |
%int4096_3046 = torch.constant.int 4096 | |
%2999 = torch.prim.ListConstruct %564, %int4096_3046 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3000 = torch.aten.view %2997, %2999 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3000, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%3001 = torch.aten.mm %3000, %2998 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,4096],f8E4M3FNUZ> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3001, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%int1_3047 = torch.constant.int 1 | |
%int4096_3048 = torch.constant.int 4096 | |
%3002 = torch.prim.ListConstruct %int1_3047, %564, %int4096_3048 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3003 = torch.aten.view %3001, %3002 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3003, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int15_3049 = torch.constant.int 15 | |
%3004 = torch.prims.convert_element_type %3003, %int15_3049 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %3004, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%3005 = torch.aten.div.Tensor %2994, %140 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %3005, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_3050 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_3051 = torch.constant.float 2.400000e+02 | |
%3006 = torch.aten.clamp %3005, %float-2.400000e02_3050, %float2.400000e02_3051 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %3006, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_3052 = torch.constant.int 26 | |
%3007 = torch.prims.convert_element_type %3006, %int26_3052 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3007, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_3053 = torch.constant.int -2 | |
%int-1_3054 = torch.constant.int -1 | |
%3008 = torch.aten.transpose.int %141, %int-2_3053, %int-1_3054 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_3055 = torch.constant.int 4096 | |
%3009 = torch.prim.ListConstruct %564, %int4096_3055 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3010 = torch.aten.view %3007, %3009 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3010, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%3011 = torch.aten.mm %3010, %3008 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3011, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_3056 = torch.constant.int 1 | |
%int1024_3057 = torch.constant.int 1024 | |
%3012 = torch.prim.ListConstruct %int1_3056, %564, %int1024_3057 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3013 = torch.aten.view %3011, %3012 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3013, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_3058 = torch.constant.int 15 | |
%3014 = torch.prims.convert_element_type %3013, %int15_3058 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %3014, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%3015 = torch.aten.div.Tensor %2994, %142 : !torch.vtensor<[1,?,4096],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %3015, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%float-2.400000e02_3059 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_3060 = torch.constant.float 2.400000e+02 | |
%3016 = torch.aten.clamp %3015, %float-2.400000e02_3059, %float2.400000e02_3060 : !torch.vtensor<[1,?,4096],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,4096],bf16> | |
torch.bind_symbolic_shape %3016, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],bf16> | |
%int26_3061 = torch.constant.int 26 | |
%3017 = torch.prims.convert_element_type %3016, %int26_3061 : !torch.vtensor<[1,?,4096],bf16>, !torch.int -> !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3017, [%548], affine_map<()[s0] -> (1, s0 * 32, 4096)> : !torch.vtensor<[1,?,4096],f8E4M3FNUZ> | |
%int-2_3062 = torch.constant.int -2 | |
%int-1_3063 = torch.constant.int -1 | |
%3018 = torch.aten.transpose.int %143, %int-2_3062, %int-1_3063 : !torch.vtensor<[1024,4096],f8E4M3FNUZ>, !torch.int, !torch.int -> !torch.vtensor<[4096,1024],f8E4M3FNUZ> | |
%int4096_3064 = torch.constant.int 4096 | |
%3019 = torch.prim.ListConstruct %564, %int4096_3064 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3020 = torch.aten.view %3017, %3019 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3020, [%548], affine_map<()[s0] -> (s0 * 32, 4096)> : !torch.vtensor<[?,4096],f8E4M3FNUZ> | |
%3021 = torch.aten.mm %3020, %3018 : !torch.vtensor<[?,4096],f8E4M3FNUZ>, !torch.vtensor<[4096,1024],f8E4M3FNUZ> -> !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3021, [%548], affine_map<()[s0] -> (s0 * 32, 1024)> : !torch.vtensor<[?,1024],f8E4M3FNUZ> | |
%int1_3065 = torch.constant.int 1 | |
%int1024_3066 = torch.constant.int 1024 | |
%3022 = torch.prim.ListConstruct %int1_3065, %564, %int1024_3066 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3023 = torch.aten.view %3021, %3022 : !torch.vtensor<[?,1024],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3023, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],f8E4M3FNUZ> | |
%int15_3067 = torch.constant.int 15 | |
%3024 = torch.prims.convert_element_type %3023, %int15_3067 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.vtensor<[1,?,1024],bf16> | |
torch.bind_symbolic_shape %3024, [%548], affine_map<()[s0] -> (1, s0 * 32, 1024)> : !torch.vtensor<[1,?,1024],bf16> | |
%int1_3068 = torch.constant.int 1 | |
%int32_3069 = torch.constant.int 32 | |
%int128_3070 = torch.constant.int 128 | |
%3025 = torch.prim.ListConstruct %int1_3068, %564, %int32_3069, %int128_3070 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3026 = torch.aten.view %3004, %3025 : !torch.vtensor<[1,?,4096],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %3026, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int1_3071 = torch.constant.int 1 | |
%int8_3072 = torch.constant.int 8 | |
%int128_3073 = torch.constant.int 128 | |
%3027 = torch.prim.ListConstruct %int1_3071, %564, %int8_3072, %int128_3073 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3028 = torch.aten.view %3014, %3027 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %3028, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int1_3074 = torch.constant.int 1 | |
%int8_3075 = torch.constant.int 8 | |
%int128_3076 = torch.constant.int 128 | |
%3029 = torch.prim.ListConstruct %int1_3074, %564, %int8_3075, %int128_3076 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3030 = torch.aten.view %3024, %3029 : !torch.vtensor<[1,?,1024],bf16>, !torch.list<int> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %3030, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int131072_3077 = torch.constant.int 131072 | |
%none_3078 = torch.constant.none | |
%none_3079 = torch.constant.none | |
%cpu_3080 = torch.constant.device "cpu" | |
%false_3081 = torch.constant.bool false | |
%3031 = torch.aten.arange %int131072_3077, %none_3078, %none_3079, %cpu_3080, %false_3081 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_3082 = torch.constant.int 0 | |
%int128_3083 = torch.constant.int 128 | |
%int2_3084 = torch.constant.int 2 | |
%int4_3085 = torch.constant.int 4 | |
%none_3086 = torch.constant.none | |
%cpu_3087 = torch.constant.device "cpu" | |
%false_3088 = torch.constant.bool false | |
%3032 = torch.aten.arange.start_step %int0_3082, %int128_3083, %int2_3084, %int4_3085, %none_3086, %cpu_3087, %false_3088 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_3089 = torch.constant.int 6 | |
%3033 = torch.prims.convert_element_type %3032, %int6_3089 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_3090 = torch.constant.int 128 | |
%3034 = torch.aten.div.Scalar %3033, %int128_3090 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_3091 = torch.constant.float 5.000000e+05 | |
%3035 = torch.aten.pow.Scalar %float5.000000e05_3091, %3034 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%3036 = torch.aten.reciprocal %3035 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_3092 = torch.constant.float 1.000000e+00 | |
%3037 = torch.aten.mul.Scalar %3036, %float1.000000e00_3092 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%3038 = torch.aten.reciprocal %3037 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_3093 = torch.constant.float 6.2831853071795862 | |
%3039 = torch.aten.mul.Scalar %3038, %float6.283190e00_3093 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_3094 = torch.constant.float 8.192000e+03 | |
%3040 = torch.aten.gt.Scalar %3039, %float8.192000e03_3094 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_3095 = torch.constant.int 8 | |
%3041 = torch.aten.div.Scalar %3037, %int8_3095 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%3042 = torch.aten.where.self %3040, %3041, %3037 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%3043 = torch.aten.reciprocal %3039 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_3096 = torch.constant.int 8192 | |
%3044 = torch.aten.mul.Scalar %3043, %int8192_3096 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_3097 = torch.constant.int 1 | |
%int1_3098 = torch.constant.int 1 | |
%3045 = torch.aten.sub.Scalar %3044, %int1_3097, %int1_3098 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_3099 = torch.constant.int 3 | |
%3046 = torch.aten.div.Scalar %3045, %int3_3099 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_3100 = torch.constant.int 1 | |
%int1_3101 = torch.constant.int 1 | |
%3047 = torch.aten.rsub.Scalar %3046, %int1_3100, %int1_3101 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%3048 = torch.aten.mul.Tensor %3047, %3042 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_3102 = torch.constant.int 8 | |
%3049 = torch.aten.div.Scalar %3048, %int8_3102 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%3050 = torch.aten.mul.Tensor %3046, %3042 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_3103 = torch.constant.int 1 | |
%3051 = torch.aten.add.Tensor %3049, %3050, %int1_3103 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_3104 = torch.constant.float 2.048000e+03 | |
%3052 = torch.aten.lt.Scalar %3039, %float2.048000e03_3104 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%3053 = torch.aten.bitwise_not %3052 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_3105 = torch.constant.float 8.192000e+03 | |
%3054 = torch.aten.gt.Scalar %3039, %float8.192000e03_3105 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%3055 = torch.aten.bitwise_not %3054 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%3056 = torch.aten.mul.Tensor %3053, %3055 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%3057 = torch.aten.where.self %3056, %3051, %3042 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%3058 = torch.prim.ListConstruct %3057, %3057 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_3106 = torch.constant.int -1 | |
%3059 = torch.aten.cat %3058, %int-1_3106 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_3107 = torch.constant.int 6 | |
%3060 = torch.prims.convert_element_type %3031, %int6_3107 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_3108 = torch.constant.int 131072 | |
%int1_3109 = torch.constant.int 1 | |
%3061 = torch.prim.ListConstruct %int131072_3108, %int1_3109 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3062 = torch.aten.view %3060, %3061 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%3063 = torch.aten.mul.Tensor %3062, %3059 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%3064 = torch.aten.cos %3063 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_3110 = torch.constant.int 15 | |
%3065 = torch.prims.convert_element_type %3064, %int15_3110 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%3066 = torch.aten.sin %3063 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_3111 = torch.constant.int 15 | |
%3067 = torch.prims.convert_element_type %3066, %int15_3111 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_3112 = torch.constant.int 1 | |
%3068 = torch.aten.size.int %3003, %int1_3112 : !torch.vtensor<[1,?,4096],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_3113 = torch.constant.int 0 | |
%3069 = torch.aten.add.int %int0_3113, %3068 : !torch.int, !torch.int -> !torch.int | |
%int0_3114 = torch.constant.int 0 | |
%int0_3115 = torch.constant.int 0 | |
%int1_3116 = torch.constant.int 1 | |
%3070 = torch.aten.slice.Tensor %3065, %int0_3114, %int0_3115, %3069, %int1_3116 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %3070, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_3117 = torch.constant.int 1 | |
%int0_3118 = torch.constant.int 0 | |
%int9223372036854775807_3119 = torch.constant.int 9223372036854775807 | |
%int1_3120 = torch.constant.int 1 | |
%3071 = torch.aten.slice.Tensor %3070, %int1_3117, %int0_3118, %int9223372036854775807_3119, %int1_3120 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %3071, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_3121 = torch.constant.int 0 | |
%3072 = torch.aten.add.int %int0_3121, %3068 : !torch.int, !torch.int -> !torch.int | |
%int0_3122 = torch.constant.int 0 | |
%int0_3123 = torch.constant.int 0 | |
%int1_3124 = torch.constant.int 1 | |
%3073 = torch.aten.slice.Tensor %3067, %int0_3122, %int0_3123, %3072, %int1_3124 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %3073, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_3125 = torch.constant.int 1 | |
%int0_3126 = torch.constant.int 0 | |
%int9223372036854775807_3127 = torch.constant.int 9223372036854775807 | |
%int1_3128 = torch.constant.int 1 | |
%3074 = torch.aten.slice.Tensor %3073, %int1_3125, %int0_3126, %int9223372036854775807_3127, %int1_3128 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %3074, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_3129 = torch.constant.int 0 | |
%3075 = torch.aten.unsqueeze %3071, %int0_3129 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %3075, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_3130 = torch.constant.int 1 | |
%int0_3131 = torch.constant.int 0 | |
%int9223372036854775807_3132 = torch.constant.int 9223372036854775807 | |
%int1_3133 = torch.constant.int 1 | |
%3076 = torch.aten.slice.Tensor %3075, %int1_3130, %int0_3131, %int9223372036854775807_3132, %int1_3133 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %3076, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_3134 = torch.constant.int 2 | |
%3077 = torch.aten.unsqueeze %3076, %int2_3134 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %3077, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_3135 = torch.constant.int 3 | |
%int0_3136 = torch.constant.int 0 | |
%int9223372036854775807_3137 = torch.constant.int 9223372036854775807 | |
%int1_3138 = torch.constant.int 1 | |
%3078 = torch.aten.slice.Tensor %3077, %int3_3135, %int0_3136, %int9223372036854775807_3137, %int1_3138 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %3078, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_3139 = torch.constant.int 0 | |
%3079 = torch.aten.unsqueeze %3074, %int0_3139 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %3079, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_3140 = torch.constant.int 1 | |
%int0_3141 = torch.constant.int 0 | |
%int9223372036854775807_3142 = torch.constant.int 9223372036854775807 | |
%int1_3143 = torch.constant.int 1 | |
%3080 = torch.aten.slice.Tensor %3079, %int1_3140, %int0_3141, %int9223372036854775807_3142, %int1_3143 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %3080, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_3144 = torch.constant.int 2 | |
%3081 = torch.aten.unsqueeze %3080, %int2_3144 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %3081, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_3145 = torch.constant.int 3 | |
%int0_3146 = torch.constant.int 0 | |
%int9223372036854775807_3147 = torch.constant.int 9223372036854775807 | |
%int1_3148 = torch.constant.int 1 | |
%3082 = torch.aten.slice.Tensor %3081, %int3_3145, %int0_3146, %int9223372036854775807_3147, %int1_3148 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %3082, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_3149 = torch.constant.int 1 | |
%int2_3150 = torch.constant.int 2 | |
%3083 = torch.aten.transpose.int %3078, %int1_3149, %int2_3150 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %3083, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_3151 = torch.constant.int 1 | |
%int1_3152 = torch.constant.int 1 | |
%int1_3153 = torch.constant.int 1 | |
%int1_3154 = torch.constant.int 1 | |
%3084 = torch.prim.ListConstruct %int1_3151, %int1_3152, %int1_3153, %int1_3154 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3085 = torch.aten.repeat %3083, %3084 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %3085, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_3155 = torch.constant.int 1 | |
%int2_3156 = torch.constant.int 2 | |
%3086 = torch.aten.transpose.int %3082, %int1_3155, %int2_3156 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %3086, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_3157 = torch.constant.int 1 | |
%int2_3158 = torch.constant.int 2 | |
%3087 = torch.aten.transpose.int %3026, %int1_3157, %int2_3158 : !torch.vtensor<[1,?,32,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %3087, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_3159 = torch.constant.int 1 | |
%int1_3160 = torch.constant.int 1 | |
%int1_3161 = torch.constant.int 1 | |
%int1_3162 = torch.constant.int 1 | |
%3088 = torch.prim.ListConstruct %int1_3159, %int1_3160, %int1_3161, %int1_3162 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3089 = torch.aten.repeat %3086, %3088 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %3089, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%3090 = torch.aten.mul.Tensor %3087, %3085 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %3090, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int3_3163 = torch.constant.int 3 | |
%int0_3164 = torch.constant.int 0 | |
%int64_3165 = torch.constant.int 64 | |
%int1_3166 = torch.constant.int 1 | |
%3091 = torch.aten.slice.Tensor %3087, %int3_3163, %int0_3164, %int64_3165, %int1_3166 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %3091, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%int3_3167 = torch.constant.int 3 | |
%int64_3168 = torch.constant.int 64 | |
%int9223372036854775807_3169 = torch.constant.int 9223372036854775807 | |
%int1_3170 = torch.constant.int 1 | |
%3092 = torch.aten.slice.Tensor %3087, %int3_3167, %int64_3168, %int9223372036854775807_3169, %int1_3170 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %3092, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%3093 = torch.aten.neg %3092 : !torch.vtensor<[1,32,?,64],bf16> -> !torch.vtensor<[1,32,?,64],bf16> | |
torch.bind_symbolic_shape %3093, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 64)> : !torch.vtensor<[1,32,?,64],bf16> | |
%3094 = torch.prim.ListConstruct %3093, %3091 : (!torch.vtensor<[1,32,?,64],bf16>, !torch.vtensor<[1,32,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_3171 = torch.constant.int -1 | |
%3095 = torch.aten.cat %3094, %int-1_3171 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %3095, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%3096 = torch.aten.mul.Tensor %3095, %3089 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %3096, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_3172 = torch.constant.int 1 | |
%3097 = torch.aten.add.Tensor %3090, %3096, %int1_3172 : !torch.vtensor<[1,32,?,128],bf16>, !torch.vtensor<[1,32,?,128],bf16>, !torch.int -> !torch.vtensor<[1,32,?,128],bf16> | |
torch.bind_symbolic_shape %3097, [%548], affine_map<()[s0] -> (1, 32, s0 * 32, 128)> : !torch.vtensor<[1,32,?,128],bf16> | |
%int1_3173 = torch.constant.int 1 | |
%int2_3174 = torch.constant.int 2 | |
%3098 = torch.aten.transpose.int %3097, %int1_3173, %int2_3174 : !torch.vtensor<[1,32,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,32,128],bf16> | |
torch.bind_symbolic_shape %3098, [%548], affine_map<()[s0] -> (1, s0 * 32, 32, 128)> : !torch.vtensor<[1,?,32,128],bf16> | |
%int131072_3175 = torch.constant.int 131072 | |
%none_3176 = torch.constant.none | |
%none_3177 = torch.constant.none | |
%cpu_3178 = torch.constant.device "cpu" | |
%false_3179 = torch.constant.bool false | |
%3099 = torch.aten.arange %int131072_3175, %none_3176, %none_3177, %cpu_3178, %false_3179 : !torch.int, !torch.none, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[131072],si64> | |
%int0_3180 = torch.constant.int 0 | |
%int128_3181 = torch.constant.int 128 | |
%int2_3182 = torch.constant.int 2 | |
%int4_3183 = torch.constant.int 4 | |
%none_3184 = torch.constant.none | |
%cpu_3185 = torch.constant.device "cpu" | |
%false_3186 = torch.constant.bool false | |
%3100 = torch.aten.arange.start_step %int0_3180, %int128_3181, %int2_3182, %int4_3183, %none_3184, %cpu_3185, %false_3186 : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[64],si64> | |
%int6_3187 = torch.constant.int 6 | |
%3101 = torch.prims.convert_element_type %3100, %int6_3187 : !torch.vtensor<[64],si64>, !torch.int -> !torch.vtensor<[64],f32> | |
%int128_3188 = torch.constant.int 128 | |
%3102 = torch.aten.div.Scalar %3101, %int128_3188 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float5.000000e05_3189 = torch.constant.float 5.000000e+05 | |
%3103 = torch.aten.pow.Scalar %float5.000000e05_3189, %3102 : !torch.float, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%3104 = torch.aten.reciprocal %3103 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float1.000000e00_3190 = torch.constant.float 1.000000e+00 | |
%3105 = torch.aten.mul.Scalar %3104, %float1.000000e00_3190 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%3106 = torch.aten.reciprocal %3105 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%float6.283190e00_3191 = torch.constant.float 6.2831853071795862 | |
%3107 = torch.aten.mul.Scalar %3106, %float6.283190e00_3191 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],f32> | |
%float8.192000e03_3192 = torch.constant.float 8.192000e+03 | |
%3108 = torch.aten.gt.Scalar %3107, %float8.192000e03_3192 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%int8_3193 = torch.constant.int 8 | |
%3109 = torch.aten.div.Scalar %3105, %int8_3193 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%3110 = torch.aten.where.self %3108, %3109, %3105 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%3111 = torch.aten.reciprocal %3107 : !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8192_3194 = torch.constant.int 8192 | |
%3112 = torch.aten.mul.Scalar %3111, %int8192_3194 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_3195 = torch.constant.int 1 | |
%int1_3196 = torch.constant.int 1 | |
%3113 = torch.aten.sub.Scalar %3112, %int1_3195, %int1_3196 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%int3_3197 = torch.constant.int 3 | |
%3114 = torch.aten.div.Scalar %3113, %int3_3197 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%int1_3198 = torch.constant.int 1 | |
%int1_3199 = torch.constant.int 1 | |
%3115 = torch.aten.rsub.Scalar %3114, %int1_3198, %int1_3199 : !torch.vtensor<[64],f32>, !torch.int, !torch.int -> !torch.vtensor<[64],f32> | |
%3116 = torch.aten.mul.Tensor %3115, %3110 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int8_3200 = torch.constant.int 8 | |
%3117 = torch.aten.div.Scalar %3116, %int8_3200 : !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%3118 = torch.aten.mul.Tensor %3114, %3110 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%int1_3201 = torch.constant.int 1 | |
%3119 = torch.aten.add.Tensor %3117, %3118, %int1_3201 : !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>, !torch.int -> !torch.vtensor<[64],f32> | |
%float2.048000e03_3202 = torch.constant.float 2.048000e+03 | |
%3120 = torch.aten.lt.Scalar %3107, %float2.048000e03_3202 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%3121 = torch.aten.bitwise_not %3120 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%float8.192000e03_3203 = torch.constant.float 8.192000e+03 | |
%3122 = torch.aten.gt.Scalar %3107, %float8.192000e03_3203 : !torch.vtensor<[64],f32>, !torch.float -> !torch.vtensor<[64],i1> | |
%3123 = torch.aten.bitwise_not %3122 : !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%3124 = torch.aten.mul.Tensor %3121, %3123 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],i1> -> !torch.vtensor<[64],i1> | |
%3125 = torch.aten.where.self %3124, %3119, %3110 : !torch.vtensor<[64],i1>, !torch.vtensor<[64],f32>, !torch.vtensor<[64],f32> -> !torch.vtensor<[64],f32> | |
%3126 = torch.prim.ListConstruct %3125, %3125 : (!torch.vtensor<[64],f32>, !torch.vtensor<[64],f32>) -> !torch.list<vtensor> | |
%int-1_3204 = torch.constant.int -1 | |
%3127 = torch.aten.cat %3126, %int-1_3204 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[128],f32> | |
%int6_3205 = torch.constant.int 6 | |
%3128 = torch.prims.convert_element_type %3099, %int6_3205 : !torch.vtensor<[131072],si64>, !torch.int -> !torch.vtensor<[131072],f32> | |
%int131072_3206 = torch.constant.int 131072 | |
%int1_3207 = torch.constant.int 1 | |
%3129 = torch.prim.ListConstruct %int131072_3206, %int1_3207 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3130 = torch.aten.view %3128, %3129 : !torch.vtensor<[131072],f32>, !torch.list<int> -> !torch.vtensor<[131072,1],f32> | |
%3131 = torch.aten.mul.Tensor %3130, %3127 : !torch.vtensor<[131072,1],f32>, !torch.vtensor<[128],f32> -> !torch.vtensor<[131072,128],f32> | |
%3132 = torch.aten.cos %3131 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_3208 = torch.constant.int 15 | |
%3133 = torch.prims.convert_element_type %3132, %int15_3208 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%3134 = torch.aten.sin %3131 : !torch.vtensor<[131072,128],f32> -> !torch.vtensor<[131072,128],f32> | |
%int15_3209 = torch.constant.int 15 | |
%3135 = torch.prims.convert_element_type %3134, %int15_3209 : !torch.vtensor<[131072,128],f32>, !torch.int -> !torch.vtensor<[131072,128],bf16> | |
%int1_3210 = torch.constant.int 1 | |
%3136 = torch.aten.size.int %3013, %int1_3210 : !torch.vtensor<[1,?,1024],f8E4M3FNUZ>, !torch.int -> !torch.int | |
%int0_3211 = torch.constant.int 0 | |
%3137 = torch.aten.add.int %int0_3211, %3136 : !torch.int, !torch.int -> !torch.int | |
%int0_3212 = torch.constant.int 0 | |
%int0_3213 = torch.constant.int 0 | |
%int1_3214 = torch.constant.int 1 | |
%3138 = torch.aten.slice.Tensor %3133, %int0_3212, %int0_3213, %3137, %int1_3214 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %3138, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_3215 = torch.constant.int 1 | |
%int0_3216 = torch.constant.int 0 | |
%int9223372036854775807_3217 = torch.constant.int 9223372036854775807 | |
%int1_3218 = torch.constant.int 1 | |
%3139 = torch.aten.slice.Tensor %3138, %int1_3215, %int0_3216, %int9223372036854775807_3217, %int1_3218 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %3139, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_3219 = torch.constant.int 0 | |
%3140 = torch.aten.add.int %int0_3219, %3136 : !torch.int, !torch.int -> !torch.int | |
%int0_3220 = torch.constant.int 0 | |
%int0_3221 = torch.constant.int 0 | |
%int1_3222 = torch.constant.int 1 | |
%3141 = torch.aten.slice.Tensor %3135, %int0_3220, %int0_3221, %3140, %int1_3222 : !torch.vtensor<[131072,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %3141, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int1_3223 = torch.constant.int 1 | |
%int0_3224 = torch.constant.int 0 | |
%int9223372036854775807_3225 = torch.constant.int 9223372036854775807 | |
%int1_3226 = torch.constant.int 1 | |
%3142 = torch.aten.slice.Tensor %3141, %int1_3223, %int0_3224, %int9223372036854775807_3225, %int1_3226 : !torch.vtensor<[?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[?,128],bf16> | |
torch.bind_symbolic_shape %3142, [%548], affine_map<()[s0] -> (s0 * 32, 128)> : !torch.vtensor<[?,128],bf16> | |
%int0_3227 = torch.constant.int 0 | |
%3143 = torch.aten.unsqueeze %3139, %int0_3227 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %3143, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_3228 = torch.constant.int 1 | |
%int0_3229 = torch.constant.int 0 | |
%int9223372036854775807_3230 = torch.constant.int 9223372036854775807 | |
%int1_3231 = torch.constant.int 1 | |
%3144 = torch.aten.slice.Tensor %3143, %int1_3228, %int0_3229, %int9223372036854775807_3230, %int1_3231 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %3144, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_3232 = torch.constant.int 2 | |
%3145 = torch.aten.unsqueeze %3144, %int2_3232 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %3145, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_3233 = torch.constant.int 3 | |
%int0_3234 = torch.constant.int 0 | |
%int9223372036854775807_3235 = torch.constant.int 9223372036854775807 | |
%int1_3236 = torch.constant.int 1 | |
%3146 = torch.aten.slice.Tensor %3145, %int3_3233, %int0_3234, %int9223372036854775807_3235, %int1_3236 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %3146, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int0_3237 = torch.constant.int 0 | |
%3147 = torch.aten.unsqueeze %3142, %int0_3237 : !torch.vtensor<[?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %3147, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int1_3238 = torch.constant.int 1 | |
%int0_3239 = torch.constant.int 0 | |
%int9223372036854775807_3240 = torch.constant.int 9223372036854775807 | |
%int1_3241 = torch.constant.int 1 | |
%3148 = torch.aten.slice.Tensor %3147, %int1_3238, %int0_3239, %int9223372036854775807_3240, %int1_3241 : !torch.vtensor<[1,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,128],bf16> | |
torch.bind_symbolic_shape %3148, [%548], affine_map<()[s0] -> (1, s0 * 32, 128)> : !torch.vtensor<[1,?,128],bf16> | |
%int2_3242 = torch.constant.int 2 | |
%3149 = torch.aten.unsqueeze %3148, %int2_3242 : !torch.vtensor<[1,?,128],bf16>, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %3149, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int3_3243 = torch.constant.int 3 | |
%int0_3244 = torch.constant.int 0 | |
%int9223372036854775807_3245 = torch.constant.int 9223372036854775807 | |
%int1_3246 = torch.constant.int 1 | |
%3150 = torch.aten.slice.Tensor %3149, %int3_3243, %int0_3244, %int9223372036854775807_3245, %int1_3246 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,?,1,128],bf16> | |
torch.bind_symbolic_shape %3150, [%548], affine_map<()[s0] -> (1, s0 * 32, 1, 128)> : !torch.vtensor<[1,?,1,128],bf16> | |
%int1_3247 = torch.constant.int 1 | |
%int2_3248 = torch.constant.int 2 | |
%3151 = torch.aten.transpose.int %3146, %int1_3247, %int2_3248 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %3151, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_3249 = torch.constant.int 1 | |
%int1_3250 = torch.constant.int 1 | |
%int1_3251 = torch.constant.int 1 | |
%int1_3252 = torch.constant.int 1 | |
%3152 = torch.prim.ListConstruct %int1_3249, %int1_3250, %int1_3251, %int1_3252 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3153 = torch.aten.repeat %3151, %3152 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %3153, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_3253 = torch.constant.int 1 | |
%int2_3254 = torch.constant.int 2 | |
%3154 = torch.aten.transpose.int %3150, %int1_3253, %int2_3254 : !torch.vtensor<[1,?,1,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %3154, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%int1_3255 = torch.constant.int 1 | |
%int2_3256 = torch.constant.int 2 | |
%3155 = torch.aten.transpose.int %3028, %int1_3255, %int2_3256 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %3155, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_3257 = torch.constant.int 1 | |
%int1_3258 = torch.constant.int 1 | |
%int1_3259 = torch.constant.int 1 | |
%int1_3260 = torch.constant.int 1 | |
%3156 = torch.prim.ListConstruct %int1_3257, %int1_3258, %int1_3259, %int1_3260 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3157 = torch.aten.repeat %3154, %3156 : !torch.vtensor<[1,1,?,128],bf16>, !torch.list<int> -> !torch.vtensor<[1,1,?,128],bf16> | |
torch.bind_symbolic_shape %3157, [%548], affine_map<()[s0] -> (1, 1, s0 * 32, 128)> : !torch.vtensor<[1,1,?,128],bf16> | |
%3158 = torch.aten.mul.Tensor %3155, %3153 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %3158, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int3_3261 = torch.constant.int 3 | |
%int0_3262 = torch.constant.int 0 | |
%int64_3263 = torch.constant.int 64 | |
%int1_3264 = torch.constant.int 1 | |
%3159 = torch.aten.slice.Tensor %3155, %int3_3261, %int0_3262, %int64_3263, %int1_3264 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %3159, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%int3_3265 = torch.constant.int 3 | |
%int64_3266 = torch.constant.int 64 | |
%int9223372036854775807_3267 = torch.constant.int 9223372036854775807 | |
%int1_3268 = torch.constant.int 1 | |
%3160 = torch.aten.slice.Tensor %3155, %int3_3265, %int64_3266, %int9223372036854775807_3267, %int1_3268 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %3160, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%3161 = torch.aten.neg %3160 : !torch.vtensor<[1,8,?,64],bf16> -> !torch.vtensor<[1,8,?,64],bf16> | |
torch.bind_symbolic_shape %3161, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 64)> : !torch.vtensor<[1,8,?,64],bf16> | |
%3162 = torch.prim.ListConstruct %3161, %3159 : (!torch.vtensor<[1,8,?,64],bf16>, !torch.vtensor<[1,8,?,64],bf16>) -> !torch.list<vtensor> | |
%int-1_3269 = torch.constant.int -1 | |
%3163 = torch.aten.cat %3162, %int-1_3269 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %3163, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%3164 = torch.aten.mul.Tensor %3163, %3157 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,1,?,128],bf16> -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %3164, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_3270 = torch.constant.int 1 | |
%3165 = torch.aten.add.Tensor %3158, %3164, %int1_3270 : !torch.vtensor<[1,8,?,128],bf16>, !torch.vtensor<[1,8,?,128],bf16>, !torch.int -> !torch.vtensor<[1,8,?,128],bf16> | |
torch.bind_symbolic_shape %3165, [%548], affine_map<()[s0] -> (1, 8, s0 * 32, 128)> : !torch.vtensor<[1,8,?,128],bf16> | |
%int1_3271 = torch.constant.int 1 | |
%int2_3272 = torch.constant.int 2 | |
%3166 = torch.aten.transpose.int %3165, %int1_3271, %int2_3272 : !torch.vtensor<[1,8,?,128],bf16>, !torch.int, !torch.int -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %3166, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%3167 = torch.aten.div.Tensor %3166, %144 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %3167, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_3273 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_3274 = torch.constant.float 2.400000e+02 | |
%3168 = torch.aten.clamp %3167, %float-2.400000e02_3273, %float2.400000e02_3274 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %3168, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_3275 = torch.constant.int 26 | |
%3169 = torch.prims.convert_element_type %3168, %int26_3275 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3169, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%3170 = torch.aten.div.Tensor %3030, %144 : !torch.vtensor<[1,?,8,128],bf16>, !torch.vtensor<[],f32> -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %3170, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%float-2.400000e02_3276 = torch.constant.float -2.400000e+02 | |
%float2.400000e02_3277 = torch.constant.float 2.400000e+02 | |
%3171 = torch.aten.clamp %3170, %float-2.400000e02_3276, %float2.400000e02_3277 : !torch.vtensor<[1,?,8,128],bf16>, !torch.float, !torch.float -> !torch.vtensor<[1,?,8,128],bf16> | |
torch.bind_symbolic_shape %3171, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],bf16> | |
%int26_3278 = torch.constant.int 26 | |
%3172 = torch.prims.convert_element_type %3171, %int26_3278 : !torch.vtensor<[1,?,8,128],bf16>, !torch.int -> !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3172, [%548], affine_map<()[s0] -> (1, s0 * 32, 8, 128)> : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ> | |
%int64_3279 = torch.constant.int 64 | |
%3173 = torch.aten.mul.Scalar %arg2, %int64_3279 : !torch.vtensor<[1,?],si64>, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %3173, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int16 = torch.constant.int 16 | |
%int1_3280 = torch.constant.int 1 | |
%3174 = torch.aten.add.Scalar %3173, %int16, %int1_3280 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %3174, [%548], affine_map<()[s0] -> (1, s0)> : !torch.vtensor<[1,?],si64> | |
%int1_3281 = torch.constant.int 1 | |
%int32_3282 = torch.constant.int 32 | |
%int8_3283 = torch.constant.int 8 | |
%int128_3284 = torch.constant.int 128 | |
%3175 = torch.prim.ListConstruct %int1_3281, %748, %int32_3282, %int8_3283, %int128_3284 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3176 = torch.aten.view %3169, %3175 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3176, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_3285 = torch.constant.int 32 | |
%int8_3286 = torch.constant.int 8 | |
%int128_3287 = torch.constant.int 128 | |
%3177 = torch.prim.ListConstruct %748, %int32_3285, %int8_3286, %int128_3287 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3178 = torch.aten.view %3176, %3177 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3178, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%3179 = torch.prim.ListConstruct %748 : (!torch.int) -> !torch.list<int> | |
%3180 = torch.aten.view %3174, %3179 : !torch.vtensor<[1,?],si64>, !torch.list<int> -> !torch.vtensor<[?],si64> | |
torch.bind_symbolic_shape %3180, [%548], affine_map<()[s0] -> (s0)> : !torch.vtensor<[?],si64> | |
%int32_3288 = torch.constant.int 32 | |
%int2_3289 = torch.constant.int 2 | |
%int32_3290 = torch.constant.int 32 | |
%int8_3291 = torch.constant.int 8 | |
%int128_3292 = torch.constant.int 128 | |
%3181 = torch.prim.ListConstruct %739, %int32_3288, %int2_3289, %int32_3290, %int8_3291, %int128_3292 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3182 = torch.aten.view %2905, %3181 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3182, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_3293 = torch.constant.int 32 | |
%3183 = torch.aten.mul.int %739, %int32_3293 : !torch.int, !torch.int -> !torch.int | |
%int2_3294 = torch.constant.int 2 | |
%3184 = torch.aten.mul.int %3183, %int2_3294 : !torch.int, !torch.int -> !torch.int | |
%int32_3295 = torch.constant.int 32 | |
%int8_3296 = torch.constant.int 8 | |
%int128_3297 = torch.constant.int 128 | |
%3185 = torch.prim.ListConstruct %3184, %int32_3295, %int8_3296, %int128_3297 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3186 = torch.aten.view %3182, %3185 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3186, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%3187 = torch.prim.ListConstruct %3180 : (!torch.vtensor<[?],si64>) -> !torch.list<optional<vtensor>> | |
%false_3298 = torch.constant.bool false | |
%3188 = torch.aten.index_put %3186, %3187, %3178, %false_3298 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<optional<vtensor>>, !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.bool -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3188, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int32_3299 = torch.constant.int 32 | |
%int2_3300 = torch.constant.int 2 | |
%int32_3301 = torch.constant.int 32 | |
%int8_3302 = torch.constant.int 8 | |
%int128_3303 = torch.constant.int 128 | |
%3189 = torch.prim.ListConstruct %739, %int32_3299, %int2_3300, %int32_3301, %int8_3302, %int128_3303 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3190 = torch.aten.view %3188, %3189 : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3190, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int2097152_3304 = torch.constant.int 2097152 | |
%3191 = torch.prim.ListConstruct %739, %int2097152_3304 : (!torch.int, !torch.int) -> !torch.list<int> | |
%3192 = torch.aten.view %3190, %3191 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3192, [%549], affine_map<()[s0] -> (s0, 2097152)> : !torch.vtensor<[?,2097152],f8E4M3FNUZ> | |
%int32_3305 = torch.constant.int 32 | |
%int2_3306 = torch.constant.int 2 | |
%int32_3307 = torch.constant.int 32 | |
%int8_3308 = torch.constant.int 8 | |
%int128_3309 = torch.constant.int 128 | |
%3193 = torch.prim.ListConstruct %739, %int32_3305, %int2_3306, %int32_3307, %int8_3308, %int128_3309 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3194 = torch.aten.view %3192, %3193 : !torch.vtensor<[?,2097152],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3194, [%549], affine_map<()[s0] -> (s0, 32, 2, 32, 8, 128)> : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ> | |
%int32_3310 = torch.constant.int 32 | |
%int8_3311 = torch.constant.int 8 | |
%int128_3312 = torch.constant.int 128 | |
%3195 = torch.prim.ListConstruct %3184, %int32_3310, %int8_3311, %int128_3312 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3196 = torch.aten.view %3194, %3195 : !torch.vtensor<[?,32,2,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3196, [%549], affine_map<()[s0] -> (s0 * 64, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_3313 = torch.constant.int 1 | |
%int32_3314 = torch.constant.int 32 | |
%int8_3315 = torch.constant.int 8 | |
%int128_3316 = torch.constant.int 128 | |
%3197 = torch.prim.ListConstruct %int1_3313, %748, %int32_3314, %int8_3315, %int128_3316 : (!torch.int, !torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3198 = torch.aten.view %3172, %3197 : !torch.vtensor<[1,?,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3198, [%548], affine_map<()[s0] -> (1, s0, 32, 8, 128)> : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ> | |
%int32_3317 = torch.constant.int 32 | |
%int8_3318 = torch.constant.int 8 | |
%int128_3319 = torch.constant.int 128 | |
%3199 = torch.prim.ListConstruct %748, %int32_3317, %int8_3318, %int128_3319 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%3200 = torch.aten.view %3198, %3199 : !torch.vtensor<[1,?,32,8,128],f8E4M3FNUZ>, !torch.list<int> -> !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
torch.bind_symbolic_shape %3200, [%548], affine_map<()[s0] -> (s0, 32, 8, 128)> : !torch.vtensor<[?,32,8,128],f8E4M3FNUZ> | |
%int1_3320 = torch.constant.int 1 | |
%int1_3321 = torch.constant.int 1 | |
%3201 = torch.aten.add.Scalar %3174, %int1_3320, %int1_3321 : !torch.vtensor<[1,?],si64>, !torch.int, !torch.int -> !torch.vtensor<[1,?],si64> | |
torch.bind_symbolic_shape %3201, [% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment