Created
January 13, 2023 08:08
-
-
Save pashu123/e4e2ed9be12cbd5b7698d7e024ea53a7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
graph(): | |
%arg0_1 : [#users=1] = placeholder[target=arg0_1] | |
%arg1_1 : [#users=1] = placeholder[target=arg1_1] | |
%arg2_1 : [#users=32] = placeholder[target=arg2_1] | |
%arg3_1 : [#users=1] = placeholder[target=arg3_1] | |
%cat : [#users=1] = call_function[target=torch.ops.aten.cat](args = ([%arg0_1, %arg0_1],), kwargs = {}) | |
%expand : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%arg1_1, [2]), kwargs = {}) | |
%arange : [#users=1] = call_function[target=torch.ops.aten.arange](args = (0, 160), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%mul : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%arange, -9.210340371976184), kwargs = {}) | |
%div : [#users=1] = call_function[target=torch.ops.aten.div](args = (%mul, 160), kwargs = {}) | |
%exp : [#users=1] = call_function[target=torch.ops.aten.exp](args = (%div,), kwargs = {}) | |
%slice_1 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%expand, 0, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_1, 1), kwargs = {}) | |
%unsqueeze_1 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%exp, 0), kwargs = {}) | |
%slice_2 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%unsqueeze_1, 1, 0, 9223372036854775807), kwargs = {}) | |
%mul_1 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%unsqueeze, %slice_2), kwargs = {}) | |
%mul_2 : [#users=2] = call_function[target=torch.ops.aten.mul](args = (%mul_1, 1), kwargs = {}) | |
%sin : [#users=1] = call_function[target=torch.ops.aten.sin](args = (%mul_2,), kwargs = {}) | |
%cos : [#users=1] = call_function[target=torch.ops.aten.cos](args = (%mul_2,), kwargs = {}) | |
%cat_1 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%sin, %cos], -1), kwargs = {}) | |
%slice_3 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%cat_1, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_4 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_3, 1, 160, 9223372036854775807), kwargs = {}) | |
%slice_5 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%cat_1, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_6 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_5, 1, 0, 160), kwargs = {}) | |
%cat_2 : [#users=1] = call_function[target=torch.ops.aten.cat](args = ([%slice_4, %slice_6], -1), kwargs = {}) | |
%_param_constant0 : [#users=1] = get_attr[target=_param_constant0] | |
%t : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant0,), kwargs = {}) | |
%_param_constant1 : [#users=1] = get_attr[target=_param_constant1] | |
%addmm : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant1, %cat_2, %t), kwargs = {}) | |
%silu : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm,), kwargs = {}) | |
%_param_constant2 : [#users=1] = get_attr[target=_param_constant2] | |
%t_1 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant2,), kwargs = {}) | |
%_param_constant3 : [#users=1] = get_attr[target=_param_constant3] | |
%addmm_1 : [#users=22] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant3, %silu, %t_1), kwargs = {}) | |
%_param_constant4 : [#users=1] = get_attr[target=_param_constant4] | |
%_param_constant5 : [#users=1] = get_attr[target=_param_constant5] | |
%convolution : [#users=3] = call_function[target=torch.ops.aten.convolution](args = (%cat, %_param_constant4, %_param_constant5, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%view : [#users=2] = call_function[target=torch.ops.aten.view](args = (%convolution, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem : [#users=1] = call_function[target=operator.getitem](args = (%var_mean, 0), kwargs = {}) | |
%getitem_1 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean, 1), kwargs = {}) | |
%add : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem, 1e-05), kwargs = {}) | |
%rsqrt : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add,), kwargs = {}) | |
%sub : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view, %getitem_1), kwargs = {}) | |
%mul_3 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub, %rsqrt), kwargs = {}) | |
%view_1 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_3, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant6 : [#users=1] = get_attr[target=_param_constant6] | |
%unsqueeze_2 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant6, 0), kwargs = {}) | |
%unsqueeze_3 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_2, 2), kwargs = {}) | |
%unsqueeze_4 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_3, 3), kwargs = {}) | |
%_param_constant7 : [#users=1] = get_attr[target=_param_constant7] | |
%unsqueeze_5 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant7, 0), kwargs = {}) | |
%unsqueeze_6 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_5, 2), kwargs = {}) | |
%unsqueeze_7 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_6, 3), kwargs = {}) | |
%mul_4 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_1, %unsqueeze_7), kwargs = {}) | |
%add_1 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_4, %unsqueeze_4), kwargs = {}) | |
%squeeze : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_1, 3), kwargs = {}) | |
%squeeze_1 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze, 2), kwargs = {}) | |
%squeeze_2 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt, 3), kwargs = {}) | |
%squeeze_3 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_2, 2), kwargs = {}) | |
%detach : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_1,), kwargs = {}) | |
%detach_1 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_3,), kwargs = {}) | |
%silu_1 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_1,), kwargs = {}) | |
%_param_constant8 : [#users=1] = get_attr[target=_param_constant8] | |
%_param_constant9 : [#users=1] = get_attr[target=_param_constant9] | |
%convolution_1 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_1, %_param_constant8, %_param_constant9, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_2 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant10 : [#users=1] = get_attr[target=_param_constant10] | |
%t_2 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant10,), kwargs = {}) | |
%_param_constant11 : [#users=1] = get_attr[target=_param_constant11] | |
%addmm_2 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant11, %silu_2, %t_2), kwargs = {}) | |
%slice_7 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_2, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_8 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_7, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_8 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_8, 2), kwargs = {}) | |
%unsqueeze_9 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_8, 3), kwargs = {}) | |
%add_2 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_1, %unsqueeze_9), kwargs = {}) | |
%view_2 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_2, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_1 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_2, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_2 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_1, 0), kwargs = {}) | |
%getitem_3 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_1, 1), kwargs = {}) | |
%add_3 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_2, 1e-05), kwargs = {}) | |
%rsqrt_1 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_3,), kwargs = {}) | |
%sub_1 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_2, %getitem_3), kwargs = {}) | |
%mul_5 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_1, %rsqrt_1), kwargs = {}) | |
%view_3 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_5, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant12 : [#users=1] = get_attr[target=_param_constant12] | |
%unsqueeze_10 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant12, 0), kwargs = {}) | |
%unsqueeze_11 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_10, 2), kwargs = {}) | |
%unsqueeze_12 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_11, 3), kwargs = {}) | |
%_param_constant13 : [#users=1] = get_attr[target=_param_constant13] | |
%unsqueeze_13 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant13, 0), kwargs = {}) | |
%unsqueeze_14 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_13, 2), kwargs = {}) | |
%unsqueeze_15 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_14, 3), kwargs = {}) | |
%mul_6 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_3, %unsqueeze_15), kwargs = {}) | |
%add_4 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_6, %unsqueeze_12), kwargs = {}) | |
%squeeze_4 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_3, 3), kwargs = {}) | |
%squeeze_5 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_4, 2), kwargs = {}) | |
%squeeze_6 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_1, 3), kwargs = {}) | |
%squeeze_7 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_6, 2), kwargs = {}) | |
%detach_2 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_5,), kwargs = {}) | |
%detach_3 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_7,), kwargs = {}) | |
%silu_3 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_4,), kwargs = {}) | |
%_param_constant14 : [#users=1] = get_attr[target=_param_constant14] | |
%_param_constant15 : [#users=1] = get_attr[target=_param_constant15] | |
%convolution_2 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_3, %_param_constant14, %_param_constant15, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_5 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution, %convolution_2), kwargs = {}) | |
%div_1 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_5, 1.0), kwargs = {}) | |
%view_4 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_1, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_2 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_4, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_4 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_2, 0), kwargs = {}) | |
%getitem_5 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_2, 1), kwargs = {}) | |
%add_6 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_4, 1e-06), kwargs = {}) | |
%rsqrt_2 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_6,), kwargs = {}) | |
%sub_2 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_4, %getitem_5), kwargs = {}) | |
%mul_7 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_2, %rsqrt_2), kwargs = {}) | |
%view_5 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_7, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant16 : [#users=1] = get_attr[target=_param_constant16] | |
%unsqueeze_16 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant16, 0), kwargs = {}) | |
%unsqueeze_17 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_16, 2), kwargs = {}) | |
%unsqueeze_18 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_17, 3), kwargs = {}) | |
%_param_constant17 : [#users=1] = get_attr[target=_param_constant17] | |
%unsqueeze_19 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant17, 0), kwargs = {}) | |
%unsqueeze_20 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_19, 2), kwargs = {}) | |
%unsqueeze_21 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_20, 3), kwargs = {}) | |
%mul_8 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_5, %unsqueeze_21), kwargs = {}) | |
%add_7 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_8, %unsqueeze_18), kwargs = {}) | |
%squeeze_8 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_5, 3), kwargs = {}) | |
%squeeze_9 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_8, 2), kwargs = {}) | |
%squeeze_10 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_2, 3), kwargs = {}) | |
%squeeze_11 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_10, 2), kwargs = {}) | |
%detach_4 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_9,), kwargs = {}) | |
%detach_5 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_11,), kwargs = {}) | |
%permute : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_7, [0, 2, 3, 1]), kwargs = {}) | |
%view_6 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute, [2, 4096, 320]), kwargs = {}) | |
%_param_constant18 : [#users=1] = get_attr[target=_param_constant18] | |
%t_3 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant18,), kwargs = {}) | |
%expand_1 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_6, [2, 4096, 320]), kwargs = {}) | |
%view_7 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_1, [2, 4096, 320]), kwargs = {}) | |
%expand_2 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_3, [2, 320, 320]), kwargs = {}) | |
%view_8 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_2, [2, 320, 320]), kwargs = {}) | |
%bmm : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_7, %view_8), kwargs = {}) | |
%_unsafe_view : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm, [2, 4096, 320]), kwargs = {}) | |
%_param_constant19 : [#users=1] = get_attr[target=_param_constant19] | |
%add_8 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view, %_param_constant19), kwargs = {}) | |
%var_mean_3 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_8, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_6 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_3, 0), kwargs = {}) | |
%getitem_7 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_3, 1), kwargs = {}) | |
%add_9 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_6, 1e-05), kwargs = {}) | |
%rsqrt_3 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_9,), kwargs = {}) | |
%sub_3 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_8, %getitem_7), kwargs = {}) | |
%mul_9 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_3, %rsqrt_3), kwargs = {}) | |
%_param_constant20 : [#users=1] = get_attr[target=_param_constant20] | |
%mul_10 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_9, %_param_constant20), kwargs = {}) | |
%_param_constant21 : [#users=1] = get_attr[target=_param_constant21] | |
%add_10 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_10, %_param_constant21), kwargs = {}) | |
%_param_constant22 : [#users=1] = get_attr[target=_param_constant22] | |
%t_4 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant22,), kwargs = {}) | |
%view_9 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_10, [8192, 320]), kwargs = {}) | |
%mm : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_9, %t_4), kwargs = {}) | |
%_unsafe_view_1 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm, [2, 4096, 320]), kwargs = {}) | |
%view_10 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_1, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_1 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_10, [0, 2, 1, 3]), kwargs = {}) | |
%clone : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_1,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_2 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone, [10, 4096, 64]), kwargs = {}) | |
%_param_constant23 : [#users=1] = get_attr[target=_param_constant23] | |
%t_5 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant23,), kwargs = {}) | |
%view_11 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_10, [8192, 320]), kwargs = {}) | |
%mm_1 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_11, %t_5), kwargs = {}) | |
%_unsafe_view_3 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_1, [2, 4096, 320]), kwargs = {}) | |
%_param_constant24 : [#users=1] = get_attr[target=_param_constant24] | |
%t_6 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant24,), kwargs = {}) | |
%view_12 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_10, [8192, 320]), kwargs = {}) | |
%mm_2 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_12, %t_6), kwargs = {}) | |
%_unsafe_view_4 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_2, [2, 4096, 320]), kwargs = {}) | |
%view_13 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_3, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_2 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_13, [0, 2, 1, 3]), kwargs = {}) | |
%clone_1 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_2,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_5 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_1, [10, 4096, 64]), kwargs = {}) | |
%view_14 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_4, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_3 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_14, [0, 2, 1, 3]), kwargs = {}) | |
%clone_2 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_3,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_6 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_2, [10, 4096, 64]), kwargs = {}) | |
%empty : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 4096],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_5, -1, -2), kwargs = {}) | |
%baddbmm : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty, %_unsafe_view_2, %transpose), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm, -1, False), kwargs = {}) | |
%detach_6 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax,), kwargs = {}) | |
%bmm_1 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax, %_unsafe_view_6), kwargs = {}) | |
%view_15 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_1, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_4 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_15, [0, 2, 1, 3]), kwargs = {}) | |
%clone_3 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_4,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_7 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_3, [2, 4096, 320]), kwargs = {}) | |
%_param_constant25 : [#users=1] = get_attr[target=_param_constant25] | |
%t_7 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant25,), kwargs = {}) | |
%view_16 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_7, [8192, 320]), kwargs = {}) | |
%_param_constant26 : [#users=1] = get_attr[target=_param_constant26] | |
%addmm_3 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant26, %view_16, %t_7), kwargs = {}) | |
%view_17 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_3, [2, 4096, 320]), kwargs = {}) | |
%add_11 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_17, %add_8), kwargs = {}) | |
%var_mean_4 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_11, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_8 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_4, 0), kwargs = {}) | |
%getitem_9 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_4, 1), kwargs = {}) | |
%add_12 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_8, 1e-05), kwargs = {}) | |
%rsqrt_4 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_12,), kwargs = {}) | |
%sub_4 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_11, %getitem_9), kwargs = {}) | |
%mul_11 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_4, %rsqrt_4), kwargs = {}) | |
%_param_constant27 : [#users=1] = get_attr[target=_param_constant27] | |
%mul_12 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_11, %_param_constant27), kwargs = {}) | |
%_param_constant28 : [#users=1] = get_attr[target=_param_constant28] | |
%add_13 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_12, %_param_constant28), kwargs = {}) | |
%_param_constant29 : [#users=1] = get_attr[target=_param_constant29] | |
%t_8 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant29,), kwargs = {}) | |
%view_18 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_13, [8192, 320]), kwargs = {}) | |
%mm_3 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_18, %t_8), kwargs = {}) | |
%_unsafe_view_8 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_3, [2, 4096, 320]), kwargs = {}) | |
%view_19 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_8, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_5 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_19, [0, 2, 1, 3]), kwargs = {}) | |
%clone_4 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_5,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_9 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_4, [10, 4096, 64]), kwargs = {}) | |
%_param_constant30 : [#users=1] = get_attr[target=_param_constant30] | |
%t_9 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant30,), kwargs = {}) | |
%view_20 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_4 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_20, %t_9), kwargs = {}) | |
%_unsafe_view_10 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_4, [2, 64, 320]), kwargs = {}) | |
%_param_constant31 : [#users=1] = get_attr[target=_param_constant31] | |
%t_10 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant31,), kwargs = {}) | |
%view_21 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_5 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_21, %t_10), kwargs = {}) | |
%_unsafe_view_11 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_5, [2, 64, 320]), kwargs = {}) | |
%view_22 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_10, [2, 64, 5, 64]), kwargs = {}) | |
%permute_6 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_22, [0, 2, 1, 3]), kwargs = {}) | |
%clone_5 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_6,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_12 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_5, [10, 64, 64]), kwargs = {}) | |
%view_23 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_11, [2, 64, 5, 64]), kwargs = {}) | |
%permute_7 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_23, [0, 2, 1, 3]), kwargs = {}) | |
%clone_6 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_7,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_13 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_6, [10, 64, 64]), kwargs = {}) | |
%empty_1 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_1 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_12, -1, -2), kwargs = {}) | |
%baddbmm_1 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_1, %_unsafe_view_9, %transpose_1), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_1 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_1, -1, False), kwargs = {}) | |
%detach_7 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_1,), kwargs = {}) | |
%bmm_2 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_1, %_unsafe_view_13), kwargs = {}) | |
%view_24 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_2, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_8 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_24, [0, 2, 1, 3]), kwargs = {}) | |
%clone_7 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_8,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_14 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_7, [2, 4096, 320]), kwargs = {}) | |
%_param_constant32 : [#users=1] = get_attr[target=_param_constant32] | |
%t_11 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant32,), kwargs = {}) | |
%view_25 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_14, [8192, 320]), kwargs = {}) | |
%_param_constant33 : [#users=1] = get_attr[target=_param_constant33] | |
%addmm_4 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant33, %view_25, %t_11), kwargs = {}) | |
%view_26 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_4, [2, 4096, 320]), kwargs = {}) | |
%add_14 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_26, %add_11), kwargs = {}) | |
%var_mean_5 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_14, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_10 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_5, 0), kwargs = {}) | |
%getitem_11 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_5, 1), kwargs = {}) | |
%add_15 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_10, 1e-05), kwargs = {}) | |
%rsqrt_5 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_15,), kwargs = {}) | |
%sub_5 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_14, %getitem_11), kwargs = {}) | |
%mul_13 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_5, %rsqrt_5), kwargs = {}) | |
%_param_constant34 : [#users=1] = get_attr[target=_param_constant34] | |
%mul_14 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_13, %_param_constant34), kwargs = {}) | |
%_param_constant35 : [#users=1] = get_attr[target=_param_constant35] | |
%add_16 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_14, %_param_constant35), kwargs = {}) | |
%_param_constant36 : [#users=1] = get_attr[target=_param_constant36] | |
%t_12 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant36,), kwargs = {}) | |
%view_27 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_16, [8192, 320]), kwargs = {}) | |
%_param_constant37 : [#users=1] = get_attr[target=_param_constant37] | |
%addmm_5 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant37, %view_27, %t_12), kwargs = {}) | |
%view_28 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_5, [2, 4096, 2560]), kwargs = {}) | |
%slice_9 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_28, -1, 0, 1280), kwargs = {}) | |
%slice_10 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_28, -1, 1280, 2560), kwargs = {}) | |
%gelu : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_10,), kwargs = {}) | |
%mul_15 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_9, %gelu), kwargs = {}) | |
%_param_constant38 : [#users=1] = get_attr[target=_param_constant38] | |
%t_13 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant38,), kwargs = {}) | |
%view_29 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_15, [8192, 1280]), kwargs = {}) | |
%_param_constant39 : [#users=1] = get_attr[target=_param_constant39] | |
%addmm_6 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant39, %view_29, %t_13), kwargs = {}) | |
%view_30 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_6, [2, 4096, 320]), kwargs = {}) | |
%add_17 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_30, %add_14), kwargs = {}) | |
%_param_constant40 : [#users=1] = get_attr[target=_param_constant40] | |
%t_14 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant40,), kwargs = {}) | |
%view_31 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_17, [8192, 320]), kwargs = {}) | |
%_param_constant41 : [#users=1] = get_attr[target=_param_constant41] | |
%addmm_7 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant41, %view_31, %t_14), kwargs = {}) | |
%view_32 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_7, [2, 4096, 320]), kwargs = {}) | |
%view_33 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_32, [2, 64, 64, 320]), kwargs = {}) | |
%permute_9 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_33, [0, 3, 1, 2]), kwargs = {}) | |
%clone_8 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_9,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_18 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%clone_8, %div_1), kwargs = {}) | |
%view_34 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_18, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_6 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_34, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_12 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_6, 0), kwargs = {}) | |
%getitem_13 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_6, 1), kwargs = {}) | |
%add_19 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_12, 1e-05), kwargs = {}) | |
%rsqrt_6 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_19,), kwargs = {}) | |
%sub_6 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_34, %getitem_13), kwargs = {}) | |
%mul_16 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_6, %rsqrt_6), kwargs = {}) | |
%view_35 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_16, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant42 : [#users=1] = get_attr[target=_param_constant42] | |
%unsqueeze_22 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant42, 0), kwargs = {}) | |
%unsqueeze_23 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_22, 2), kwargs = {}) | |
%unsqueeze_24 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_23, 3), kwargs = {}) | |
%_param_constant43 : [#users=1] = get_attr[target=_param_constant43] | |
%unsqueeze_25 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant43, 0), kwargs = {}) | |
%unsqueeze_26 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_25, 2), kwargs = {}) | |
%unsqueeze_27 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_26, 3), kwargs = {}) | |
%mul_17 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_35, %unsqueeze_27), kwargs = {}) | |
%add_20 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_17, %unsqueeze_24), kwargs = {}) | |
%squeeze_12 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_13, 3), kwargs = {}) | |
%squeeze_13 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_12, 2), kwargs = {}) | |
%squeeze_14 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_6, 3), kwargs = {}) | |
%squeeze_15 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_14, 2), kwargs = {}) | |
%detach_8 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_13,), kwargs = {}) | |
%detach_9 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_15,), kwargs = {}) | |
%silu_4 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_20,), kwargs = {}) | |
%_param_constant44 : [#users=1] = get_attr[target=_param_constant44] | |
%_param_constant45 : [#users=1] = get_attr[target=_param_constant45] | |
%convolution_3 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_4, %_param_constant44, %_param_constant45, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_5 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant46 : [#users=1] = get_attr[target=_param_constant46] | |
%t_15 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant46,), kwargs = {}) | |
%_param_constant47 : [#users=1] = get_attr[target=_param_constant47] | |
%addmm_8 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant47, %silu_5, %t_15), kwargs = {}) | |
%slice_11 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_8, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_12 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_11, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_28 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_12, 2), kwargs = {}) | |
%unsqueeze_29 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_28, 3), kwargs = {}) | |
%add_21 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_3, %unsqueeze_29), kwargs = {}) | |
%view_36 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_21, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_7 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_36, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_14 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_7, 0), kwargs = {}) | |
%getitem_15 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_7, 1), kwargs = {}) | |
%add_22 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_14, 1e-05), kwargs = {}) | |
%rsqrt_7 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_22,), kwargs = {}) | |
%sub_7 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_36, %getitem_15), kwargs = {}) | |
%mul_18 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_7, %rsqrt_7), kwargs = {}) | |
%view_37 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_18, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant48 : [#users=1] = get_attr[target=_param_constant48] | |
%unsqueeze_30 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant48, 0), kwargs = {}) | |
%unsqueeze_31 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_30, 2), kwargs = {}) | |
%unsqueeze_32 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_31, 3), kwargs = {}) | |
%_param_constant49 : [#users=1] = get_attr[target=_param_constant49] | |
%unsqueeze_33 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant49, 0), kwargs = {}) | |
%unsqueeze_34 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_33, 2), kwargs = {}) | |
%unsqueeze_35 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_34, 3), kwargs = {}) | |
%mul_19 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_37, %unsqueeze_35), kwargs = {}) | |
%add_23 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_19, %unsqueeze_32), kwargs = {}) | |
%squeeze_16 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_15, 3), kwargs = {}) | |
%squeeze_17 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_16, 2), kwargs = {}) | |
%squeeze_18 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_7, 3), kwargs = {}) | |
%squeeze_19 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_18, 2), kwargs = {}) | |
%detach_10 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_17,), kwargs = {}) | |
%detach_11 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_19,), kwargs = {}) | |
%silu_6 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_23,), kwargs = {}) | |
%_param_constant50 : [#users=1] = get_attr[target=_param_constant50] | |
%_param_constant51 : [#users=1] = get_attr[target=_param_constant51] | |
%convolution_4 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_6, %_param_constant50, %_param_constant51, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_24 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%add_18, %convolution_4), kwargs = {}) | |
%div_2 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_24, 1.0), kwargs = {}) | |
%view_38 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_2, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_8 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_38, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_16 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_8, 0), kwargs = {}) | |
%getitem_17 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_8, 1), kwargs = {}) | |
%add_25 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_16, 1e-06), kwargs = {}) | |
%rsqrt_8 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_25,), kwargs = {}) | |
%sub_8 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_38, %getitem_17), kwargs = {}) | |
%mul_20 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_8, %rsqrt_8), kwargs = {}) | |
%view_39 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_20, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant52 : [#users=1] = get_attr[target=_param_constant52] | |
%unsqueeze_36 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant52, 0), kwargs = {}) | |
%unsqueeze_37 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_36, 2), kwargs = {}) | |
%unsqueeze_38 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_37, 3), kwargs = {}) | |
%_param_constant53 : [#users=1] = get_attr[target=_param_constant53] | |
%unsqueeze_39 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant53, 0), kwargs = {}) | |
%unsqueeze_40 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_39, 2), kwargs = {}) | |
%unsqueeze_41 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_40, 3), kwargs = {}) | |
%mul_21 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_39, %unsqueeze_41), kwargs = {}) | |
%add_26 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_21, %unsqueeze_38), kwargs = {}) | |
%squeeze_20 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_17, 3), kwargs = {}) | |
%squeeze_21 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_20, 2), kwargs = {}) | |
%squeeze_22 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_8, 3), kwargs = {}) | |
%squeeze_23 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_22, 2), kwargs = {}) | |
%detach_12 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_21,), kwargs = {}) | |
%detach_13 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_23,), kwargs = {}) | |
%permute_10 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_26, [0, 2, 3, 1]), kwargs = {}) | |
%view_40 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_10, [2, 4096, 320]), kwargs = {}) | |
%_param_constant54 : [#users=1] = get_attr[target=_param_constant54] | |
%t_16 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant54,), kwargs = {}) | |
%expand_3 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_40, [2, 4096, 320]), kwargs = {}) | |
%view_41 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_3, [2, 4096, 320]), kwargs = {}) | |
%expand_4 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_16, [2, 320, 320]), kwargs = {}) | |
%view_42 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_4, [2, 320, 320]), kwargs = {}) | |
%bmm_3 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_41, %view_42), kwargs = {}) | |
%_unsafe_view_15 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_3, [2, 4096, 320]), kwargs = {}) | |
%_param_constant55 : [#users=1] = get_attr[target=_param_constant55] | |
%add_27 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_15, %_param_constant55), kwargs = {}) | |
%var_mean_9 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_27, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_18 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_9, 0), kwargs = {}) | |
%getitem_19 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_9, 1), kwargs = {}) | |
%add_28 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_18, 1e-05), kwargs = {}) | |
%rsqrt_9 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_28,), kwargs = {}) | |
%sub_9 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_27, %getitem_19), kwargs = {}) | |
%mul_22 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_9, %rsqrt_9), kwargs = {}) | |
%_param_constant56 : [#users=1] = get_attr[target=_param_constant56] | |
%mul_23 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_22, %_param_constant56), kwargs = {}) | |
%_param_constant57 : [#users=1] = get_attr[target=_param_constant57] | |
%add_29 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_23, %_param_constant57), kwargs = {}) | |
%_param_constant58 : [#users=1] = get_attr[target=_param_constant58] | |
%t_17 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant58,), kwargs = {}) | |
%view_43 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_29, [8192, 320]), kwargs = {}) | |
%mm_6 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_43, %t_17), kwargs = {}) | |
%_unsafe_view_16 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_6, [2, 4096, 320]), kwargs = {}) | |
%view_44 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_16, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_11 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_44, [0, 2, 1, 3]), kwargs = {}) | |
%clone_9 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_11,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_17 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_9, [10, 4096, 64]), kwargs = {}) | |
%_param_constant59 : [#users=1] = get_attr[target=_param_constant59] | |
%t_18 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant59,), kwargs = {}) | |
%view_45 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_29, [8192, 320]), kwargs = {}) | |
%mm_7 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_45, %t_18), kwargs = {}) | |
%_unsafe_view_18 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_7, [2, 4096, 320]), kwargs = {}) | |
%_param_constant60 : [#users=1] = get_attr[target=_param_constant60] | |
%t_19 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant60,), kwargs = {}) | |
%view_46 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_29, [8192, 320]), kwargs = {}) | |
%mm_8 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_46, %t_19), kwargs = {}) | |
%_unsafe_view_19 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_8, [2, 4096, 320]), kwargs = {}) | |
%view_47 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_18, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_12 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_47, [0, 2, 1, 3]), kwargs = {}) | |
%clone_10 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_12,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_20 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_10, [10, 4096, 64]), kwargs = {}) | |
%view_48 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_19, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_13 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_48, [0, 2, 1, 3]), kwargs = {}) | |
%clone_11 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_13,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_21 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_11, [10, 4096, 64]), kwargs = {}) | |
%empty_2 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 4096],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_2 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_20, -1, -2), kwargs = {}) | |
%baddbmm_2 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_2, %_unsafe_view_17, %transpose_2), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_2 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_2, -1, False), kwargs = {}) | |
%detach_14 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_2,), kwargs = {}) | |
%bmm_4 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_2, %_unsafe_view_21), kwargs = {}) | |
%view_49 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_4, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_14 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_49, [0, 2, 1, 3]), kwargs = {}) | |
%clone_12 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_14,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_22 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_12, [2, 4096, 320]), kwargs = {}) | |
%_param_constant61 : [#users=1] = get_attr[target=_param_constant61] | |
%t_20 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant61,), kwargs = {}) | |
%view_50 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_22, [8192, 320]), kwargs = {}) | |
%_param_constant62 : [#users=1] = get_attr[target=_param_constant62] | |
%addmm_9 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant62, %view_50, %t_20), kwargs = {}) | |
%view_51 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_9, [2, 4096, 320]), kwargs = {}) | |
%add_30 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_51, %add_27), kwargs = {}) | |
%var_mean_10 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_30, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_20 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_10, 0), kwargs = {}) | |
%getitem_21 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_10, 1), kwargs = {}) | |
%add_31 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_20, 1e-05), kwargs = {}) | |
%rsqrt_10 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_31,), kwargs = {}) | |
%sub_10 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_30, %getitem_21), kwargs = {}) | |
%mul_24 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_10, %rsqrt_10), kwargs = {}) | |
%_param_constant63 : [#users=1] = get_attr[target=_param_constant63] | |
%mul_25 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_24, %_param_constant63), kwargs = {}) | |
%_param_constant64 : [#users=1] = get_attr[target=_param_constant64] | |
%add_32 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_25, %_param_constant64), kwargs = {}) | |
%_param_constant65 : [#users=1] = get_attr[target=_param_constant65] | |
%t_21 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant65,), kwargs = {}) | |
%view_52 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_32, [8192, 320]), kwargs = {}) | |
%mm_9 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_52, %t_21), kwargs = {}) | |
%_unsafe_view_23 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_9, [2, 4096, 320]), kwargs = {}) | |
%view_53 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_23, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_15 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_53, [0, 2, 1, 3]), kwargs = {}) | |
%clone_13 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_15,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_24 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_13, [10, 4096, 64]), kwargs = {}) | |
%_param_constant66 : [#users=1] = get_attr[target=_param_constant66] | |
%t_22 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant66,), kwargs = {}) | |
%view_54 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_10 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_54, %t_22), kwargs = {}) | |
%_unsafe_view_25 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_10, [2, 64, 320]), kwargs = {}) | |
%_param_constant67 : [#users=1] = get_attr[target=_param_constant67] | |
%t_23 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant67,), kwargs = {}) | |
%view_55 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_11 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_55, %t_23), kwargs = {}) | |
%_unsafe_view_26 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_11, [2, 64, 320]), kwargs = {}) | |
%view_56 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_25, [2, 64, 5, 64]), kwargs = {}) | |
%permute_16 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_56, [0, 2, 1, 3]), kwargs = {}) | |
%clone_14 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_16,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_27 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_14, [10, 64, 64]), kwargs = {}) | |
%view_57 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_26, [2, 64, 5, 64]), kwargs = {}) | |
%permute_17 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_57, [0, 2, 1, 3]), kwargs = {}) | |
%clone_15 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_17,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_28 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_15, [10, 64, 64]), kwargs = {}) | |
%empty_3 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_3 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_27, -1, -2), kwargs = {}) | |
%baddbmm_3 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_3, %_unsafe_view_24, %transpose_3), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_3 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_3, -1, False), kwargs = {}) | |
%detach_15 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_3,), kwargs = {}) | |
%bmm_5 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_3, %_unsafe_view_28), kwargs = {}) | |
%view_58 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_5, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_18 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_58, [0, 2, 1, 3]), kwargs = {}) | |
%clone_16 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_18,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_29 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_16, [2, 4096, 320]), kwargs = {}) | |
%_param_constant68 : [#users=1] = get_attr[target=_param_constant68] | |
%t_24 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant68,), kwargs = {}) | |
%view_59 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_29, [8192, 320]), kwargs = {}) | |
%_param_constant69 : [#users=1] = get_attr[target=_param_constant69] | |
%addmm_10 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant69, %view_59, %t_24), kwargs = {}) | |
%view_60 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_10, [2, 4096, 320]), kwargs = {}) | |
%add_33 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_60, %add_30), kwargs = {}) | |
%var_mean_11 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_33, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_22 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_11, 0), kwargs = {}) | |
%getitem_23 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_11, 1), kwargs = {}) | |
%add_34 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_22, 1e-05), kwargs = {}) | |
%rsqrt_11 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_34,), kwargs = {}) | |
%sub_11 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_33, %getitem_23), kwargs = {}) | |
%mul_26 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_11, %rsqrt_11), kwargs = {}) | |
%_param_constant70 : [#users=1] = get_attr[target=_param_constant70] | |
%mul_27 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_26, %_param_constant70), kwargs = {}) | |
%_param_constant71 : [#users=1] = get_attr[target=_param_constant71] | |
%add_35 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_27, %_param_constant71), kwargs = {}) | |
%_param_constant72 : [#users=1] = get_attr[target=_param_constant72] | |
%t_25 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant72,), kwargs = {}) | |
%view_61 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_35, [8192, 320]), kwargs = {}) | |
%_param_constant73 : [#users=1] = get_attr[target=_param_constant73] | |
%addmm_11 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant73, %view_61, %t_25), kwargs = {}) | |
%view_62 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_11, [2, 4096, 2560]), kwargs = {}) | |
%slice_13 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_62, -1, 0, 1280), kwargs = {}) | |
%slice_14 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_62, -1, 1280, 2560), kwargs = {}) | |
%gelu_1 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_14,), kwargs = {}) | |
%mul_28 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_13, %gelu_1), kwargs = {}) | |
%_param_constant74 : [#users=1] = get_attr[target=_param_constant74] | |
%t_26 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant74,), kwargs = {}) | |
%view_63 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_28, [8192, 1280]), kwargs = {}) | |
%_param_constant75 : [#users=1] = get_attr[target=_param_constant75] | |
%addmm_12 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant75, %view_63, %t_26), kwargs = {}) | |
%view_64 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_12, [2, 4096, 320]), kwargs = {}) | |
%add_36 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_64, %add_33), kwargs = {}) | |
%_param_constant76 : [#users=1] = get_attr[target=_param_constant76] | |
%t_27 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant76,), kwargs = {}) | |
%view_65 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_36, [8192, 320]), kwargs = {}) | |
%_param_constant77 : [#users=1] = get_attr[target=_param_constant77] | |
%addmm_13 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant77, %view_65, %t_27), kwargs = {}) | |
%view_66 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_13, [2, 4096, 320]), kwargs = {}) | |
%view_67 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_66, [2, 64, 64, 320]), kwargs = {}) | |
%permute_19 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_67, [0, 3, 1, 2]), kwargs = {}) | |
%clone_17 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_19,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_37 : [#users=2] = call_function[target=torch.ops.aten.add](args = (%clone_17, %div_2), kwargs = {}) | |
%_param_constant78 : [#users=1] = get_attr[target=_param_constant78] | |
%_param_constant79 : [#users=1] = get_attr[target=_param_constant79] | |
%convolution_5 : [#users=3] = call_function[target=torch.ops.aten.convolution](args = (%add_37, %_param_constant78, %_param_constant79, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%view_68 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%convolution_5, [2, 32, 10, 1024]), kwargs = {}) | |
%var_mean_12 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_68, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_24 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_12, 0), kwargs = {}) | |
%getitem_25 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_12, 1), kwargs = {}) | |
%add_38 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_24, 1e-05), kwargs = {}) | |
%rsqrt_12 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_38,), kwargs = {}) | |
%sub_12 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_68, %getitem_25), kwargs = {}) | |
%mul_29 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_12, %rsqrt_12), kwargs = {}) | |
%view_69 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_29, [2, 320, 32, 32]), kwargs = {}) | |
%_param_constant80 : [#users=1] = get_attr[target=_param_constant80] | |
%unsqueeze_42 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant80, 0), kwargs = {}) | |
%unsqueeze_43 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_42, 2), kwargs = {}) | |
%unsqueeze_44 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_43, 3), kwargs = {}) | |
%_param_constant81 : [#users=1] = get_attr[target=_param_constant81] | |
%unsqueeze_45 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant81, 0), kwargs = {}) | |
%unsqueeze_46 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_45, 2), kwargs = {}) | |
%unsqueeze_47 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_46, 3), kwargs = {}) | |
%mul_30 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_69, %unsqueeze_47), kwargs = {}) | |
%add_39 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_30, %unsqueeze_44), kwargs = {}) | |
%squeeze_24 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_25, 3), kwargs = {}) | |
%squeeze_25 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_24, 2), kwargs = {}) | |
%squeeze_26 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_12, 3), kwargs = {}) | |
%squeeze_27 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_26, 2), kwargs = {}) | |
%detach_16 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_25,), kwargs = {}) | |
%detach_17 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_27,), kwargs = {}) | |
%silu_7 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_39,), kwargs = {}) | |
%_param_constant82 : [#users=1] = get_attr[target=_param_constant82] | |
%_param_constant83 : [#users=1] = get_attr[target=_param_constant83] | |
%convolution_6 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_7, %_param_constant82, %_param_constant83, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_8 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant84 : [#users=1] = get_attr[target=_param_constant84] | |
%t_28 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant84,), kwargs = {}) | |
%_param_constant85 : [#users=1] = get_attr[target=_param_constant85] | |
%addmm_14 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant85, %silu_8, %t_28), kwargs = {}) | |
%slice_15 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_14, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_16 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_15, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_48 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_16, 2), kwargs = {}) | |
%unsqueeze_49 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_48, 3), kwargs = {}) | |
%add_40 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_6, %unsqueeze_49), kwargs = {}) | |
%view_70 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_40, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_13 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_70, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_26 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_13, 0), kwargs = {}) | |
%getitem_27 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_13, 1), kwargs = {}) | |
%add_41 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_26, 1e-05), kwargs = {}) | |
%rsqrt_13 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_41,), kwargs = {}) | |
%sub_13 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_70, %getitem_27), kwargs = {}) | |
%mul_31 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_13, %rsqrt_13), kwargs = {}) | |
%view_71 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_31, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant86 : [#users=1] = get_attr[target=_param_constant86] | |
%unsqueeze_50 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant86, 0), kwargs = {}) | |
%unsqueeze_51 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_50, 2), kwargs = {}) | |
%unsqueeze_52 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_51, 3), kwargs = {}) | |
%_param_constant87 : [#users=1] = get_attr[target=_param_constant87] | |
%unsqueeze_53 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant87, 0), kwargs = {}) | |
%unsqueeze_54 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_53, 2), kwargs = {}) | |
%unsqueeze_55 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_54, 3), kwargs = {}) | |
%mul_32 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_71, %unsqueeze_55), kwargs = {}) | |
%add_42 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_32, %unsqueeze_52), kwargs = {}) | |
%squeeze_28 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_27, 3), kwargs = {}) | |
%squeeze_29 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_28, 2), kwargs = {}) | |
%squeeze_30 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_13, 3), kwargs = {}) | |
%squeeze_31 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_30, 2), kwargs = {}) | |
%detach_18 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_29,), kwargs = {}) | |
%detach_19 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_31,), kwargs = {}) | |
%silu_9 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_42,), kwargs = {}) | |
%_param_constant88 : [#users=1] = get_attr[target=_param_constant88] | |
%_param_constant89 : [#users=1] = get_attr[target=_param_constant89] | |
%convolution_7 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_9, %_param_constant88, %_param_constant89, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant90 : [#users=1] = get_attr[target=_param_constant90] | |
%_param_constant91 : [#users=1] = get_attr[target=_param_constant91] | |
%convolution_8 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%convolution_5, %_param_constant90, %_param_constant91, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_43 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_8, %convolution_7), kwargs = {}) | |
%div_3 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_43, 1.0), kwargs = {}) | |
%view_72 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_3, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_14 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_72, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_28 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_14, 0), kwargs = {}) | |
%getitem_29 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_14, 1), kwargs = {}) | |
%add_44 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_28, 1e-06), kwargs = {}) | |
%rsqrt_14 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_44,), kwargs = {}) | |
%sub_14 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_72, %getitem_29), kwargs = {}) | |
%mul_33 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_14, %rsqrt_14), kwargs = {}) | |
%view_73 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_33, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant92 : [#users=1] = get_attr[target=_param_constant92] | |
%unsqueeze_56 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant92, 0), kwargs = {}) | |
%unsqueeze_57 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_56, 2), kwargs = {}) | |
%unsqueeze_58 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_57, 3), kwargs = {}) | |
%_param_constant93 : [#users=1] = get_attr[target=_param_constant93] | |
%unsqueeze_59 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant93, 0), kwargs = {}) | |
%unsqueeze_60 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_59, 2), kwargs = {}) | |
%unsqueeze_61 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_60, 3), kwargs = {}) | |
%mul_34 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_73, %unsqueeze_61), kwargs = {}) | |
%add_45 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_34, %unsqueeze_58), kwargs = {}) | |
%squeeze_32 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_29, 3), kwargs = {}) | |
%squeeze_33 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_32, 2), kwargs = {}) | |
%squeeze_34 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_14, 3), kwargs = {}) | |
%squeeze_35 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_34, 2), kwargs = {}) | |
%detach_20 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_33,), kwargs = {}) | |
%detach_21 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_35,), kwargs = {}) | |
%permute_20 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_45, [0, 2, 3, 1]), kwargs = {}) | |
%view_74 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_20, [2, 1024, 640]), kwargs = {}) | |
%_param_constant94 : [#users=1] = get_attr[target=_param_constant94] | |
%t_29 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant94,), kwargs = {}) | |
%expand_5 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_74, [2, 1024, 640]), kwargs = {}) | |
%view_75 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_5, [2, 1024, 640]), kwargs = {}) | |
%expand_6 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_29, [2, 640, 640]), kwargs = {}) | |
%view_76 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_6, [2, 640, 640]), kwargs = {}) | |
%bmm_6 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_75, %view_76), kwargs = {}) | |
%_unsafe_view_30 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_6, [2, 1024, 640]), kwargs = {}) | |
%_param_constant95 : [#users=1] = get_attr[target=_param_constant95] | |
%add_46 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_30, %_param_constant95), kwargs = {}) | |
%var_mean_15 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_46, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_30 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_15, 0), kwargs = {}) | |
%getitem_31 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_15, 1), kwargs = {}) | |
%add_47 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_30, 1e-05), kwargs = {}) | |
%rsqrt_15 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_47,), kwargs = {}) | |
%sub_15 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_46, %getitem_31), kwargs = {}) | |
%mul_35 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_15, %rsqrt_15), kwargs = {}) | |
%_param_constant96 : [#users=1] = get_attr[target=_param_constant96] | |
%mul_36 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_35, %_param_constant96), kwargs = {}) | |
%_param_constant97 : [#users=1] = get_attr[target=_param_constant97] | |
%add_48 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_36, %_param_constant97), kwargs = {}) | |
%_param_constant98 : [#users=1] = get_attr[target=_param_constant98] | |
%t_30 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant98,), kwargs = {}) | |
%view_77 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_48, [2048, 640]), kwargs = {}) | |
%mm_12 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_77, %t_30), kwargs = {}) | |
%_unsafe_view_31 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_12, [2, 1024, 640]), kwargs = {}) | |
%view_78 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_31, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_21 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_78, [0, 2, 1, 3]), kwargs = {}) | |
%clone_18 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_21,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_32 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_18, [20, 1024, 64]), kwargs = {}) | |
%_param_constant99 : [#users=1] = get_attr[target=_param_constant99] | |
%t_31 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant99,), kwargs = {}) | |
%view_79 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_48, [2048, 640]), kwargs = {}) | |
%mm_13 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_79, %t_31), kwargs = {}) | |
%_unsafe_view_33 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_13, [2, 1024, 640]), kwargs = {}) | |
%_param_constant100 : [#users=1] = get_attr[target=_param_constant100] | |
%t_32 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant100,), kwargs = {}) | |
%view_80 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_48, [2048, 640]), kwargs = {}) | |
%mm_14 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_80, %t_32), kwargs = {}) | |
%_unsafe_view_34 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_14, [2, 1024, 640]), kwargs = {}) | |
%view_81 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_33, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_22 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_81, [0, 2, 1, 3]), kwargs = {}) | |
%clone_19 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_22,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_35 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_19, [20, 1024, 64]), kwargs = {}) | |
%view_82 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_34, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_23 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_82, [0, 2, 1, 3]), kwargs = {}) | |
%clone_20 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_23,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_36 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_20, [20, 1024, 64]), kwargs = {}) | |
%empty_4 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 1024],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_4 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_35, -1, -2), kwargs = {}) | |
%baddbmm_4 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_4, %_unsafe_view_32, %transpose_4), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_4 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_4, -1, False), kwargs = {}) | |
%detach_22 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_4,), kwargs = {}) | |
%bmm_7 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_4, %_unsafe_view_36), kwargs = {}) | |
%view_83 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_7, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_24 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_83, [0, 2, 1, 3]), kwargs = {}) | |
%clone_21 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_24,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_37 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_21, [2, 1024, 640]), kwargs = {}) | |
%_param_constant101 : [#users=1] = get_attr[target=_param_constant101] | |
%t_33 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant101,), kwargs = {}) | |
%view_84 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_37, [2048, 640]), kwargs = {}) | |
%_param_constant102 : [#users=1] = get_attr[target=_param_constant102] | |
%addmm_15 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant102, %view_84, %t_33), kwargs = {}) | |
%view_85 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_15, [2, 1024, 640]), kwargs = {}) | |
%add_49 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_85, %add_46), kwargs = {}) | |
%var_mean_16 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_49, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_32 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_16, 0), kwargs = {}) | |
%getitem_33 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_16, 1), kwargs = {}) | |
%add_50 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_32, 1e-05), kwargs = {}) | |
%rsqrt_16 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_50,), kwargs = {}) | |
%sub_16 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_49, %getitem_33), kwargs = {}) | |
%mul_37 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_16, %rsqrt_16), kwargs = {}) | |
%_param_constant103 : [#users=1] = get_attr[target=_param_constant103] | |
%mul_38 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_37, %_param_constant103), kwargs = {}) | |
%_param_constant104 : [#users=1] = get_attr[target=_param_constant104] | |
%add_51 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_38, %_param_constant104), kwargs = {}) | |
%_param_constant105 : [#users=1] = get_attr[target=_param_constant105] | |
%t_34 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant105,), kwargs = {}) | |
%view_86 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_51, [2048, 640]), kwargs = {}) | |
%mm_15 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_86, %t_34), kwargs = {}) | |
%_unsafe_view_38 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_15, [2, 1024, 640]), kwargs = {}) | |
%view_87 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_38, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_25 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_87, [0, 2, 1, 3]), kwargs = {}) | |
%clone_22 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_25,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_39 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_22, [20, 1024, 64]), kwargs = {}) | |
%_param_constant106 : [#users=1] = get_attr[target=_param_constant106] | |
%t_35 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant106,), kwargs = {}) | |
%view_88 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_16 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_88, %t_35), kwargs = {}) | |
%_unsafe_view_40 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_16, [2, 64, 640]), kwargs = {}) | |
%_param_constant107 : [#users=1] = get_attr[target=_param_constant107] | |
%t_36 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant107,), kwargs = {}) | |
%view_89 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_17 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_89, %t_36), kwargs = {}) | |
%_unsafe_view_41 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_17, [2, 64, 640]), kwargs = {}) | |
%view_90 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_40, [2, 64, 10, 64]), kwargs = {}) | |
%permute_26 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_90, [0, 2, 1, 3]), kwargs = {}) | |
%clone_23 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_26,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_42 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_23, [20, 64, 64]), kwargs = {}) | |
%view_91 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_41, [2, 64, 10, 64]), kwargs = {}) | |
%permute_27 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_91, [0, 2, 1, 3]), kwargs = {}) | |
%clone_24 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_27,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_43 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_24, [20, 64, 64]), kwargs = {}) | |
%empty_5 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_5 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_42, -1, -2), kwargs = {}) | |
%baddbmm_5 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_5, %_unsafe_view_39, %transpose_5), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_5 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_5, -1, False), kwargs = {}) | |
%detach_23 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_5,), kwargs = {}) | |
%bmm_8 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_5, %_unsafe_view_43), kwargs = {}) | |
%view_92 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_8, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_28 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_92, [0, 2, 1, 3]), kwargs = {}) | |
%clone_25 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_28,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_44 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_25, [2, 1024, 640]), kwargs = {}) | |
%_param_constant108 : [#users=1] = get_attr[target=_param_constant108] | |
%t_37 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant108,), kwargs = {}) | |
%view_93 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_44, [2048, 640]), kwargs = {}) | |
%_param_constant109 : [#users=1] = get_attr[target=_param_constant109] | |
%addmm_16 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant109, %view_93, %t_37), kwargs = {}) | |
%view_94 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_16, [2, 1024, 640]), kwargs = {}) | |
%add_52 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_94, %add_49), kwargs = {}) | |
%var_mean_17 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_52, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_34 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_17, 0), kwargs = {}) | |
%getitem_35 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_17, 1), kwargs = {}) | |
%add_53 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_34, 1e-05), kwargs = {}) | |
%rsqrt_17 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_53,), kwargs = {}) | |
%sub_17 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_52, %getitem_35), kwargs = {}) | |
%mul_39 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_17, %rsqrt_17), kwargs = {}) | |
%_param_constant110 : [#users=1] = get_attr[target=_param_constant110] | |
%mul_40 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_39, %_param_constant110), kwargs = {}) | |
%_param_constant111 : [#users=1] = get_attr[target=_param_constant111] | |
%add_54 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_40, %_param_constant111), kwargs = {}) | |
%_param_constant112 : [#users=1] = get_attr[target=_param_constant112] | |
%t_38 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant112,), kwargs = {}) | |
%view_95 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_54, [2048, 640]), kwargs = {}) | |
%_param_constant113 : [#users=1] = get_attr[target=_param_constant113] | |
%addmm_17 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant113, %view_95, %t_38), kwargs = {}) | |
%view_96 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_17, [2, 1024, 5120]), kwargs = {}) | |
%slice_17 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_96, -1, 0, 2560), kwargs = {}) | |
%slice_18 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_96, -1, 2560, 5120), kwargs = {}) | |
%gelu_2 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_18,), kwargs = {}) | |
%mul_41 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_17, %gelu_2), kwargs = {}) | |
%_param_constant114 : [#users=1] = get_attr[target=_param_constant114] | |
%t_39 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant114,), kwargs = {}) | |
%view_97 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_41, [2048, 2560]), kwargs = {}) | |
%_param_constant115 : [#users=1] = get_attr[target=_param_constant115] | |
%addmm_18 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant115, %view_97, %t_39), kwargs = {}) | |
%view_98 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_18, [2, 1024, 640]), kwargs = {}) | |
%add_55 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_98, %add_52), kwargs = {}) | |
%_param_constant116 : [#users=1] = get_attr[target=_param_constant116] | |
%t_40 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant116,), kwargs = {}) | |
%view_99 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_55, [2048, 640]), kwargs = {}) | |
%_param_constant117 : [#users=1] = get_attr[target=_param_constant117] | |
%addmm_19 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant117, %view_99, %t_40), kwargs = {}) | |
%view_100 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_19, [2, 1024, 640]), kwargs = {}) | |
%view_101 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_100, [2, 32, 32, 640]), kwargs = {}) | |
%permute_29 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_101, [0, 3, 1, 2]), kwargs = {}) | |
%clone_26 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_29,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_56 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%clone_26, %div_3), kwargs = {}) | |
%view_102 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_56, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_18 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_102, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_36 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_18, 0), kwargs = {}) | |
%getitem_37 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_18, 1), kwargs = {}) | |
%add_57 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_36, 1e-05), kwargs = {}) | |
%rsqrt_18 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_57,), kwargs = {}) | |
%sub_18 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_102, %getitem_37), kwargs = {}) | |
%mul_42 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_18, %rsqrt_18), kwargs = {}) | |
%view_103 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_42, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant118 : [#users=1] = get_attr[target=_param_constant118] | |
%unsqueeze_62 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant118, 0), kwargs = {}) | |
%unsqueeze_63 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_62, 2), kwargs = {}) | |
%unsqueeze_64 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_63, 3), kwargs = {}) | |
%_param_constant119 : [#users=1] = get_attr[target=_param_constant119] | |
%unsqueeze_65 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant119, 0), kwargs = {}) | |
%unsqueeze_66 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_65, 2), kwargs = {}) | |
%unsqueeze_67 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_66, 3), kwargs = {}) | |
%mul_43 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_103, %unsqueeze_67), kwargs = {}) | |
%add_58 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_43, %unsqueeze_64), kwargs = {}) | |
%squeeze_36 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_37, 3), kwargs = {}) | |
%squeeze_37 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_36, 2), kwargs = {}) | |
%squeeze_38 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_18, 3), kwargs = {}) | |
%squeeze_39 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_38, 2), kwargs = {}) | |
%detach_24 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_37,), kwargs = {}) | |
%detach_25 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_39,), kwargs = {}) | |
%silu_10 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_58,), kwargs = {}) | |
%_param_constant120 : [#users=1] = get_attr[target=_param_constant120] | |
%_param_constant121 : [#users=1] = get_attr[target=_param_constant121] | |
%convolution_9 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_10, %_param_constant120, %_param_constant121, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_11 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant122 : [#users=1] = get_attr[target=_param_constant122] | |
%t_41 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant122,), kwargs = {}) | |
%_param_constant123 : [#users=1] = get_attr[target=_param_constant123] | |
%addmm_20 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant123, %silu_11, %t_41), kwargs = {}) | |
%slice_19 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_20, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_20 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_19, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_68 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_20, 2), kwargs = {}) | |
%unsqueeze_69 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_68, 3), kwargs = {}) | |
%add_59 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_9, %unsqueeze_69), kwargs = {}) | |
%view_104 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_59, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_19 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_104, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_38 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_19, 0), kwargs = {}) | |
%getitem_39 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_19, 1), kwargs = {}) | |
%add_60 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_38, 1e-05), kwargs = {}) | |
%rsqrt_19 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_60,), kwargs = {}) | |
%sub_19 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_104, %getitem_39), kwargs = {}) | |
%mul_44 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_19, %rsqrt_19), kwargs = {}) | |
%view_105 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_44, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant124 : [#users=1] = get_attr[target=_param_constant124] | |
%unsqueeze_70 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant124, 0), kwargs = {}) | |
%unsqueeze_71 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_70, 2), kwargs = {}) | |
%unsqueeze_72 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_71, 3), kwargs = {}) | |
%_param_constant125 : [#users=1] = get_attr[target=_param_constant125] | |
%unsqueeze_73 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant125, 0), kwargs = {}) | |
%unsqueeze_74 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_73, 2), kwargs = {}) | |
%unsqueeze_75 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_74, 3), kwargs = {}) | |
%mul_45 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_105, %unsqueeze_75), kwargs = {}) | |
%add_61 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_45, %unsqueeze_72), kwargs = {}) | |
%squeeze_40 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_39, 3), kwargs = {}) | |
%squeeze_41 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_40, 2), kwargs = {}) | |
%squeeze_42 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_19, 3), kwargs = {}) | |
%squeeze_43 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_42, 2), kwargs = {}) | |
%detach_26 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_41,), kwargs = {}) | |
%detach_27 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_43,), kwargs = {}) | |
%silu_12 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_61,), kwargs = {}) | |
%_param_constant126 : [#users=1] = get_attr[target=_param_constant126] | |
%_param_constant127 : [#users=1] = get_attr[target=_param_constant127] | |
%convolution_10 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_12, %_param_constant126, %_param_constant127, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_62 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%add_56, %convolution_10), kwargs = {}) | |
%div_4 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_62, 1.0), kwargs = {}) | |
%view_106 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_4, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_20 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_106, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_40 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_20, 0), kwargs = {}) | |
%getitem_41 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_20, 1), kwargs = {}) | |
%add_63 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_40, 1e-06), kwargs = {}) | |
%rsqrt_20 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_63,), kwargs = {}) | |
%sub_20 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_106, %getitem_41), kwargs = {}) | |
%mul_46 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_20, %rsqrt_20), kwargs = {}) | |
%view_107 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_46, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant128 : [#users=1] = get_attr[target=_param_constant128] | |
%unsqueeze_76 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant128, 0), kwargs = {}) | |
%unsqueeze_77 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_76, 2), kwargs = {}) | |
%unsqueeze_78 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_77, 3), kwargs = {}) | |
%_param_constant129 : [#users=1] = get_attr[target=_param_constant129] | |
%unsqueeze_79 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant129, 0), kwargs = {}) | |
%unsqueeze_80 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_79, 2), kwargs = {}) | |
%unsqueeze_81 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_80, 3), kwargs = {}) | |
%mul_47 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_107, %unsqueeze_81), kwargs = {}) | |
%add_64 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_47, %unsqueeze_78), kwargs = {}) | |
%squeeze_44 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_41, 3), kwargs = {}) | |
%squeeze_45 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_44, 2), kwargs = {}) | |
%squeeze_46 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_20, 3), kwargs = {}) | |
%squeeze_47 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_46, 2), kwargs = {}) | |
%detach_28 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_45,), kwargs = {}) | |
%detach_29 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_47,), kwargs = {}) | |
%permute_30 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_64, [0, 2, 3, 1]), kwargs = {}) | |
%view_108 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_30, [2, 1024, 640]), kwargs = {}) | |
%_param_constant130 : [#users=1] = get_attr[target=_param_constant130] | |
%t_42 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant130,), kwargs = {}) | |
%expand_7 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_108, [2, 1024, 640]), kwargs = {}) | |
%view_109 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_7, [2, 1024, 640]), kwargs = {}) | |
%expand_8 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_42, [2, 640, 640]), kwargs = {}) | |
%view_110 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_8, [2, 640, 640]), kwargs = {}) | |
%bmm_9 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_109, %view_110), kwargs = {}) | |
%_unsafe_view_45 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_9, [2, 1024, 640]), kwargs = {}) | |
%_param_constant131 : [#users=1] = get_attr[target=_param_constant131] | |
%add_65 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_45, %_param_constant131), kwargs = {}) | |
%var_mean_21 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_65, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_42 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_21, 0), kwargs = {}) | |
%getitem_43 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_21, 1), kwargs = {}) | |
%add_66 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_42, 1e-05), kwargs = {}) | |
%rsqrt_21 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_66,), kwargs = {}) | |
%sub_21 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_65, %getitem_43), kwargs = {}) | |
%mul_48 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_21, %rsqrt_21), kwargs = {}) | |
%_param_constant132 : [#users=1] = get_attr[target=_param_constant132] | |
%mul_49 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_48, %_param_constant132), kwargs = {}) | |
%_param_constant133 : [#users=1] = get_attr[target=_param_constant133] | |
%add_67 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_49, %_param_constant133), kwargs = {}) | |
%_param_constant134 : [#users=1] = get_attr[target=_param_constant134] | |
%t_43 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant134,), kwargs = {}) | |
%view_111 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_67, [2048, 640]), kwargs = {}) | |
%mm_18 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_111, %t_43), kwargs = {}) | |
%_unsafe_view_46 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_18, [2, 1024, 640]), kwargs = {}) | |
%view_112 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_46, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_31 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_112, [0, 2, 1, 3]), kwargs = {}) | |
%clone_27 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_31,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_47 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_27, [20, 1024, 64]), kwargs = {}) | |
%_param_constant135 : [#users=1] = get_attr[target=_param_constant135] | |
%t_44 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant135,), kwargs = {}) | |
%view_113 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_67, [2048, 640]), kwargs = {}) | |
%mm_19 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_113, %t_44), kwargs = {}) | |
%_unsafe_view_48 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_19, [2, 1024, 640]), kwargs = {}) | |
%_param_constant136 : [#users=1] = get_attr[target=_param_constant136] | |
%t_45 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant136,), kwargs = {}) | |
%view_114 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_67, [2048, 640]), kwargs = {}) | |
%mm_20 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_114, %t_45), kwargs = {}) | |
%_unsafe_view_49 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_20, [2, 1024, 640]), kwargs = {}) | |
%view_115 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_48, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_32 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_115, [0, 2, 1, 3]), kwargs = {}) | |
%clone_28 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_32,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_50 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_28, [20, 1024, 64]), kwargs = {}) | |
%view_116 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_49, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_33 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_116, [0, 2, 1, 3]), kwargs = {}) | |
%clone_29 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_33,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_51 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_29, [20, 1024, 64]), kwargs = {}) | |
%empty_6 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 1024],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_6 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_50, -1, -2), kwargs = {}) | |
%baddbmm_6 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_6, %_unsafe_view_47, %transpose_6), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_6 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_6, -1, False), kwargs = {}) | |
%detach_30 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_6,), kwargs = {}) | |
%bmm_10 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_6, %_unsafe_view_51), kwargs = {}) | |
%view_117 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_10, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_34 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_117, [0, 2, 1, 3]), kwargs = {}) | |
%clone_30 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_34,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_52 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_30, [2, 1024, 640]), kwargs = {}) | |
%_param_constant137 : [#users=1] = get_attr[target=_param_constant137] | |
%t_46 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant137,), kwargs = {}) | |
%view_118 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_52, [2048, 640]), kwargs = {}) | |
%_param_constant138 : [#users=1] = get_attr[target=_param_constant138] | |
%addmm_21 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant138, %view_118, %t_46), kwargs = {}) | |
%view_119 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_21, [2, 1024, 640]), kwargs = {}) | |
%add_68 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_119, %add_65), kwargs = {}) | |
%var_mean_22 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_68, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_44 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_22, 0), kwargs = {}) | |
%getitem_45 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_22, 1), kwargs = {}) | |
%add_69 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_44, 1e-05), kwargs = {}) | |
%rsqrt_22 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_69,), kwargs = {}) | |
%sub_22 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_68, %getitem_45), kwargs = {}) | |
%mul_50 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_22, %rsqrt_22), kwargs = {}) | |
%_param_constant139 : [#users=1] = get_attr[target=_param_constant139] | |
%mul_51 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_50, %_param_constant139), kwargs = {}) | |
%_param_constant140 : [#users=1] = get_attr[target=_param_constant140] | |
%add_70 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_51, %_param_constant140), kwargs = {}) | |
%_param_constant141 : [#users=1] = get_attr[target=_param_constant141] | |
%t_47 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant141,), kwargs = {}) | |
%view_120 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_70, [2048, 640]), kwargs = {}) | |
%mm_21 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_120, %t_47), kwargs = {}) | |
%_unsafe_view_53 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_21, [2, 1024, 640]), kwargs = {}) | |
%view_121 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_53, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_35 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_121, [0, 2, 1, 3]), kwargs = {}) | |
%clone_31 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_35,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_54 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_31, [20, 1024, 64]), kwargs = {}) | |
%_param_constant142 : [#users=1] = get_attr[target=_param_constant142] | |
%t_48 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant142,), kwargs = {}) | |
%view_122 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_22 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_122, %t_48), kwargs = {}) | |
%_unsafe_view_55 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_22, [2, 64, 640]), kwargs = {}) | |
%_param_constant143 : [#users=1] = get_attr[target=_param_constant143] | |
%t_49 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant143,), kwargs = {}) | |
%view_123 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_23 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_123, %t_49), kwargs = {}) | |
%_unsafe_view_56 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_23, [2, 64, 640]), kwargs = {}) | |
%view_124 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_55, [2, 64, 10, 64]), kwargs = {}) | |
%permute_36 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_124, [0, 2, 1, 3]), kwargs = {}) | |
%clone_32 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_36,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_57 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_32, [20, 64, 64]), kwargs = {}) | |
%view_125 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_56, [2, 64, 10, 64]), kwargs = {}) | |
%permute_37 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_125, [0, 2, 1, 3]), kwargs = {}) | |
%clone_33 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_37,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_58 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_33, [20, 64, 64]), kwargs = {}) | |
%empty_7 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_7 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_57, -1, -2), kwargs = {}) | |
%baddbmm_7 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_7, %_unsafe_view_54, %transpose_7), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_7 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_7, -1, False), kwargs = {}) | |
%detach_31 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_7,), kwargs = {}) | |
%bmm_11 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_7, %_unsafe_view_58), kwargs = {}) | |
%view_126 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_11, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_38 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_126, [0, 2, 1, 3]), kwargs = {}) | |
%clone_34 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_38,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_59 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_34, [2, 1024, 640]), kwargs = {}) | |
%_param_constant144 : [#users=1] = get_attr[target=_param_constant144] | |
%t_50 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant144,), kwargs = {}) | |
%view_127 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_59, [2048, 640]), kwargs = {}) | |
%_param_constant145 : [#users=1] = get_attr[target=_param_constant145] | |
%addmm_22 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant145, %view_127, %t_50), kwargs = {}) | |
%view_128 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_22, [2, 1024, 640]), kwargs = {}) | |
%add_71 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_128, %add_68), kwargs = {}) | |
%var_mean_23 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_71, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_46 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_23, 0), kwargs = {}) | |
%getitem_47 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_23, 1), kwargs = {}) | |
%add_72 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_46, 1e-05), kwargs = {}) | |
%rsqrt_23 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_72,), kwargs = {}) | |
%sub_23 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_71, %getitem_47), kwargs = {}) | |
%mul_52 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_23, %rsqrt_23), kwargs = {}) | |
%_param_constant146 : [#users=1] = get_attr[target=_param_constant146] | |
%mul_53 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_52, %_param_constant146), kwargs = {}) | |
%_param_constant147 : [#users=1] = get_attr[target=_param_constant147] | |
%add_73 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_53, %_param_constant147), kwargs = {}) | |
%_param_constant148 : [#users=1] = get_attr[target=_param_constant148] | |
%t_51 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant148,), kwargs = {}) | |
%view_129 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_73, [2048, 640]), kwargs = {}) | |
%_param_constant149 : [#users=1] = get_attr[target=_param_constant149] | |
%addmm_23 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant149, %view_129, %t_51), kwargs = {}) | |
%view_130 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_23, [2, 1024, 5120]), kwargs = {}) | |
%slice_21 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_130, -1, 0, 2560), kwargs = {}) | |
%slice_22 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_130, -1, 2560, 5120), kwargs = {}) | |
%gelu_3 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_22,), kwargs = {}) | |
%mul_54 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_21, %gelu_3), kwargs = {}) | |
%_param_constant150 : [#users=1] = get_attr[target=_param_constant150] | |
%t_52 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant150,), kwargs = {}) | |
%view_131 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_54, [2048, 2560]), kwargs = {}) | |
%_param_constant151 : [#users=1] = get_attr[target=_param_constant151] | |
%addmm_24 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant151, %view_131, %t_52), kwargs = {}) | |
%view_132 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_24, [2, 1024, 640]), kwargs = {}) | |
%add_74 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_132, %add_71), kwargs = {}) | |
%_param_constant152 : [#users=1] = get_attr[target=_param_constant152] | |
%t_53 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant152,), kwargs = {}) | |
%view_133 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_74, [2048, 640]), kwargs = {}) | |
%_param_constant153 : [#users=1] = get_attr[target=_param_constant153] | |
%addmm_25 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant153, %view_133, %t_53), kwargs = {}) | |
%view_134 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_25, [2, 1024, 640]), kwargs = {}) | |
%view_135 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_134, [2, 32, 32, 640]), kwargs = {}) | |
%permute_39 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_135, [0, 3, 1, 2]), kwargs = {}) | |
%clone_35 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_39,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_75 : [#users=2] = call_function[target=torch.ops.aten.add](args = (%clone_35, %div_4), kwargs = {}) | |
%_param_constant154 : [#users=1] = get_attr[target=_param_constant154] | |
%_param_constant155 : [#users=1] = get_attr[target=_param_constant155] | |
%convolution_11 : [#users=3] = call_function[target=torch.ops.aten.convolution](args = (%add_75, %_param_constant154, %_param_constant155, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%view_136 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%convolution_11, [2, 32, 20, 256]), kwargs = {}) | |
%var_mean_24 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_136, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_48 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_24, 0), kwargs = {}) | |
%getitem_49 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_24, 1), kwargs = {}) | |
%add_76 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_48, 1e-05), kwargs = {}) | |
%rsqrt_24 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_76,), kwargs = {}) | |
%sub_24 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_136, %getitem_49), kwargs = {}) | |
%mul_55 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_24, %rsqrt_24), kwargs = {}) | |
%view_137 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_55, [2, 640, 16, 16]), kwargs = {}) | |
%_param_constant156 : [#users=1] = get_attr[target=_param_constant156] | |
%unsqueeze_82 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant156, 0), kwargs = {}) | |
%unsqueeze_83 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_82, 2), kwargs = {}) | |
%unsqueeze_84 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_83, 3), kwargs = {}) | |
%_param_constant157 : [#users=1] = get_attr[target=_param_constant157] | |
%unsqueeze_85 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant157, 0), kwargs = {}) | |
%unsqueeze_86 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_85, 2), kwargs = {}) | |
%unsqueeze_87 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_86, 3), kwargs = {}) | |
%mul_56 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_137, %unsqueeze_87), kwargs = {}) | |
%add_77 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_56, %unsqueeze_84), kwargs = {}) | |
%squeeze_48 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_49, 3), kwargs = {}) | |
%squeeze_49 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_48, 2), kwargs = {}) | |
%squeeze_50 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_24, 3), kwargs = {}) | |
%squeeze_51 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_50, 2), kwargs = {}) | |
%detach_32 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_49,), kwargs = {}) | |
%detach_33 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_51,), kwargs = {}) | |
%silu_13 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_77,), kwargs = {}) | |
%_param_constant158 : [#users=1] = get_attr[target=_param_constant158] | |
%_param_constant159 : [#users=1] = get_attr[target=_param_constant159] | |
%convolution_12 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_13, %_param_constant158, %_param_constant159, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_14 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant160 : [#users=1] = get_attr[target=_param_constant160] | |
%t_54 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant160,), kwargs = {}) | |
%_param_constant161 : [#users=1] = get_attr[target=_param_constant161] | |
%addmm_26 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant161, %silu_14, %t_54), kwargs = {}) | |
%slice_23 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_26, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_24 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_23, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_88 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_24, 2), kwargs = {}) | |
%unsqueeze_89 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_88, 3), kwargs = {}) | |
%add_78 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_12, %unsqueeze_89), kwargs = {}) | |
%view_138 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_78, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_25 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_138, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_50 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_25, 0), kwargs = {}) | |
%getitem_51 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_25, 1), kwargs = {}) | |
%add_79 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_50, 1e-05), kwargs = {}) | |
%rsqrt_25 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_79,), kwargs = {}) | |
%sub_25 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_138, %getitem_51), kwargs = {}) | |
%mul_57 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_25, %rsqrt_25), kwargs = {}) | |
%view_139 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_57, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant162 : [#users=1] = get_attr[target=_param_constant162] | |
%unsqueeze_90 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant162, 0), kwargs = {}) | |
%unsqueeze_91 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_90, 2), kwargs = {}) | |
%unsqueeze_92 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_91, 3), kwargs = {}) | |
%_param_constant163 : [#users=1] = get_attr[target=_param_constant163] | |
%unsqueeze_93 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant163, 0), kwargs = {}) | |
%unsqueeze_94 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_93, 2), kwargs = {}) | |
%unsqueeze_95 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_94, 3), kwargs = {}) | |
%mul_58 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_139, %unsqueeze_95), kwargs = {}) | |
%add_80 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_58, %unsqueeze_92), kwargs = {}) | |
%squeeze_52 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_51, 3), kwargs = {}) | |
%squeeze_53 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_52, 2), kwargs = {}) | |
%squeeze_54 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_25, 3), kwargs = {}) | |
%squeeze_55 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_54, 2), kwargs = {}) | |
%detach_34 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_53,), kwargs = {}) | |
%detach_35 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_55,), kwargs = {}) | |
%silu_15 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_80,), kwargs = {}) | |
%_param_constant164 : [#users=1] = get_attr[target=_param_constant164] | |
%_param_constant165 : [#users=1] = get_attr[target=_param_constant165] | |
%convolution_13 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_15, %_param_constant164, %_param_constant165, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant166 : [#users=1] = get_attr[target=_param_constant166] | |
%_param_constant167 : [#users=1] = get_attr[target=_param_constant167] | |
%convolution_14 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%convolution_11, %_param_constant166, %_param_constant167, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_81 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_14, %convolution_13), kwargs = {}) | |
%div_5 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_81, 1.0), kwargs = {}) | |
%view_140 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_5, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_26 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_140, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_52 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_26, 0), kwargs = {}) | |
%getitem_53 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_26, 1), kwargs = {}) | |
%add_82 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_52, 1e-06), kwargs = {}) | |
%rsqrt_26 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_82,), kwargs = {}) | |
%sub_26 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_140, %getitem_53), kwargs = {}) | |
%mul_59 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_26, %rsqrt_26), kwargs = {}) | |
%view_141 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_59, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant168 : [#users=1] = get_attr[target=_param_constant168] | |
%unsqueeze_96 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant168, 0), kwargs = {}) | |
%unsqueeze_97 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_96, 2), kwargs = {}) | |
%unsqueeze_98 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_97, 3), kwargs = {}) | |
%_param_constant169 : [#users=1] = get_attr[target=_param_constant169] | |
%unsqueeze_99 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant169, 0), kwargs = {}) | |
%unsqueeze_100 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_99, 2), kwargs = {}) | |
%unsqueeze_101 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_100, 3), kwargs = {}) | |
%mul_60 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_141, %unsqueeze_101), kwargs = {}) | |
%add_83 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_60, %unsqueeze_98), kwargs = {}) | |
%squeeze_56 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_53, 3), kwargs = {}) | |
%squeeze_57 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_56, 2), kwargs = {}) | |
%squeeze_58 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_26, 3), kwargs = {}) | |
%squeeze_59 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_58, 2), kwargs = {}) | |
%detach_36 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_57,), kwargs = {}) | |
%detach_37 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_59,), kwargs = {}) | |
%permute_40 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_83, [0, 2, 3, 1]), kwargs = {}) | |
%view_142 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_40, [2, 256, 1280]), kwargs = {}) | |
%_param_constant170 : [#users=1] = get_attr[target=_param_constant170] | |
%t_55 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant170,), kwargs = {}) | |
%expand_9 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_142, [2, 256, 1280]), kwargs = {}) | |
%view_143 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_9, [2, 256, 1280]), kwargs = {}) | |
%expand_10 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_55, [2, 1280, 1280]), kwargs = {}) | |
%view_144 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_10, [2, 1280, 1280]), kwargs = {}) | |
%bmm_12 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_143, %view_144), kwargs = {}) | |
%_unsafe_view_60 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_12, [2, 256, 1280]), kwargs = {}) | |
%_param_constant171 : [#users=1] = get_attr[target=_param_constant171] | |
%add_84 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_60, %_param_constant171), kwargs = {}) | |
%var_mean_27 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_84, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_54 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_27, 0), kwargs = {}) | |
%getitem_55 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_27, 1), kwargs = {}) | |
%add_85 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_54, 1e-05), kwargs = {}) | |
%rsqrt_27 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_85,), kwargs = {}) | |
%sub_27 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_84, %getitem_55), kwargs = {}) | |
%mul_61 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_27, %rsqrt_27), kwargs = {}) | |
%_param_constant172 : [#users=1] = get_attr[target=_param_constant172] | |
%mul_62 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_61, %_param_constant172), kwargs = {}) | |
%_param_constant173 : [#users=1] = get_attr[target=_param_constant173] | |
%add_86 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_62, %_param_constant173), kwargs = {}) | |
%_param_constant174 : [#users=1] = get_attr[target=_param_constant174] | |
%t_56 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant174,), kwargs = {}) | |
%view_145 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_86, [512, 1280]), kwargs = {}) | |
%mm_24 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_145, %t_56), kwargs = {}) | |
%_unsafe_view_61 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_24, [2, 256, 1280]), kwargs = {}) | |
%view_146 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_61, [2, 256, 20, 64]), kwargs = {}) | |
%permute_41 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_146, [0, 2, 1, 3]), kwargs = {}) | |
%clone_36 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_41,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_62 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_36, [40, 256, 64]), kwargs = {}) | |
%_param_constant175 : [#users=1] = get_attr[target=_param_constant175] | |
%t_57 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant175,), kwargs = {}) | |
%view_147 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_86, [512, 1280]), kwargs = {}) | |
%mm_25 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_147, %t_57), kwargs = {}) | |
%_unsafe_view_63 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_25, [2, 256, 1280]), kwargs = {}) | |
%_param_constant176 : [#users=1] = get_attr[target=_param_constant176] | |
%t_58 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant176,), kwargs = {}) | |
%view_148 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_86, [512, 1280]), kwargs = {}) | |
%mm_26 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_148, %t_58), kwargs = {}) | |
%_unsafe_view_64 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_26, [2, 256, 1280]), kwargs = {}) | |
%view_149 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_63, [2, 256, 20, 64]), kwargs = {}) | |
%permute_42 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_149, [0, 2, 1, 3]), kwargs = {}) | |
%clone_37 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_42,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_65 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_37, [40, 256, 64]), kwargs = {}) | |
%view_150 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_64, [2, 256, 20, 64]), kwargs = {}) | |
%permute_43 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_150, [0, 2, 1, 3]), kwargs = {}) | |
%clone_38 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_43,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_66 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_38, [40, 256, 64]), kwargs = {}) | |
%empty_8 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 256],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_8 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_65, -1, -2), kwargs = {}) | |
%baddbmm_8 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_8, %_unsafe_view_62, %transpose_8), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_8 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_8, -1, False), kwargs = {}) | |
%detach_38 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_8,), kwargs = {}) | |
%bmm_13 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_8, %_unsafe_view_66), kwargs = {}) | |
%view_151 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_13, [2, 20, 256, 64]), kwargs = {}) | |
%permute_44 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_151, [0, 2, 1, 3]), kwargs = {}) | |
%clone_39 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_44,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_67 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_39, [2, 256, 1280]), kwargs = {}) | |
%_param_constant177 : [#users=1] = get_attr[target=_param_constant177] | |
%t_59 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant177,), kwargs = {}) | |
%view_152 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_67, [512, 1280]), kwargs = {}) | |
%_param_constant178 : [#users=1] = get_attr[target=_param_constant178] | |
%addmm_27 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant178, %view_152, %t_59), kwargs = {}) | |
%view_153 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_27, [2, 256, 1280]), kwargs = {}) | |
%add_87 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_153, %add_84), kwargs = {}) | |
%var_mean_28 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_87, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_56 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_28, 0), kwargs = {}) | |
%getitem_57 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_28, 1), kwargs = {}) | |
%add_88 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_56, 1e-05), kwargs = {}) | |
%rsqrt_28 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_88,), kwargs = {}) | |
%sub_28 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_87, %getitem_57), kwargs = {}) | |
%mul_63 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_28, %rsqrt_28), kwargs = {}) | |
%_param_constant179 : [#users=1] = get_attr[target=_param_constant179] | |
%mul_64 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_63, %_param_constant179), kwargs = {}) | |
%_param_constant180 : [#users=1] = get_attr[target=_param_constant180] | |
%add_89 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_64, %_param_constant180), kwargs = {}) | |
%_param_constant181 : [#users=1] = get_attr[target=_param_constant181] | |
%t_60 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant181,), kwargs = {}) | |
%view_154 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_89, [512, 1280]), kwargs = {}) | |
%mm_27 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_154, %t_60), kwargs = {}) | |
%_unsafe_view_68 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_27, [2, 256, 1280]), kwargs = {}) | |
%view_155 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_68, [2, 256, 20, 64]), kwargs = {}) | |
%permute_45 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_155, [0, 2, 1, 3]), kwargs = {}) | |
%clone_40 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_45,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_69 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_40, [40, 256, 64]), kwargs = {}) | |
%_param_constant182 : [#users=1] = get_attr[target=_param_constant182] | |
%t_61 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant182,), kwargs = {}) | |
%view_156 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_28 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_156, %t_61), kwargs = {}) | |
%_unsafe_view_70 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_28, [2, 64, 1280]), kwargs = {}) | |
%_param_constant183 : [#users=1] = get_attr[target=_param_constant183] | |
%t_62 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant183,), kwargs = {}) | |
%view_157 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_29 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_157, %t_62), kwargs = {}) | |
%_unsafe_view_71 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_29, [2, 64, 1280]), kwargs = {}) | |
%view_158 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_70, [2, 64, 20, 64]), kwargs = {}) | |
%permute_46 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_158, [0, 2, 1, 3]), kwargs = {}) | |
%clone_41 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_46,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_72 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_41, [40, 64, 64]), kwargs = {}) | |
%view_159 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_71, [2, 64, 20, 64]), kwargs = {}) | |
%permute_47 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_159, [0, 2, 1, 3]), kwargs = {}) | |
%clone_42 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_47,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_73 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_42, [40, 64, 64]), kwargs = {}) | |
%empty_9 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_9 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_72, -1, -2), kwargs = {}) | |
%baddbmm_9 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_9, %_unsafe_view_69, %transpose_9), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_9 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_9, -1, False), kwargs = {}) | |
%detach_39 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_9,), kwargs = {}) | |
%bmm_14 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_9, %_unsafe_view_73), kwargs = {}) | |
%view_160 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_14, [2, 20, 256, 64]), kwargs = {}) | |
%permute_48 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_160, [0, 2, 1, 3]), kwargs = {}) | |
%clone_43 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_48,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_74 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_43, [2, 256, 1280]), kwargs = {}) | |
%_param_constant184 : [#users=1] = get_attr[target=_param_constant184] | |
%t_63 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant184,), kwargs = {}) | |
%view_161 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_74, [512, 1280]), kwargs = {}) | |
%_param_constant185 : [#users=1] = get_attr[target=_param_constant185] | |
%addmm_28 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant185, %view_161, %t_63), kwargs = {}) | |
%view_162 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_28, [2, 256, 1280]), kwargs = {}) | |
%add_90 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_162, %add_87), kwargs = {}) | |
%var_mean_29 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_90, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_58 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_29, 0), kwargs = {}) | |
%getitem_59 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_29, 1), kwargs = {}) | |
%add_91 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_58, 1e-05), kwargs = {}) | |
%rsqrt_29 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_91,), kwargs = {}) | |
%sub_29 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_90, %getitem_59), kwargs = {}) | |
%mul_65 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_29, %rsqrt_29), kwargs = {}) | |
%_param_constant186 : [#users=1] = get_attr[target=_param_constant186] | |
%mul_66 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_65, %_param_constant186), kwargs = {}) | |
%_param_constant187 : [#users=1] = get_attr[target=_param_constant187] | |
%add_92 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_66, %_param_constant187), kwargs = {}) | |
%_param_constant188 : [#users=1] = get_attr[target=_param_constant188] | |
%t_64 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant188,), kwargs = {}) | |
%view_163 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_92, [512, 1280]), kwargs = {}) | |
%_param_constant189 : [#users=1] = get_attr[target=_param_constant189] | |
%addmm_29 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant189, %view_163, %t_64), kwargs = {}) | |
%view_164 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_29, [2, 256, 10240]), kwargs = {}) | |
%slice_25 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_164, -1, 0, 5120), kwargs = {}) | |
%slice_26 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_164, -1, 5120, 10240), kwargs = {}) | |
%gelu_4 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_26,), kwargs = {}) | |
%mul_67 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_25, %gelu_4), kwargs = {}) | |
%_param_constant190 : [#users=1] = get_attr[target=_param_constant190] | |
%t_65 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant190,), kwargs = {}) | |
%view_165 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_67, [512, 5120]), kwargs = {}) | |
%_param_constant191 : [#users=1] = get_attr[target=_param_constant191] | |
%addmm_30 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant191, %view_165, %t_65), kwargs = {}) | |
%view_166 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_30, [2, 256, 1280]), kwargs = {}) | |
%add_93 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_166, %add_90), kwargs = {}) | |
%_param_constant192 : [#users=1] = get_attr[target=_param_constant192] | |
%t_66 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant192,), kwargs = {}) | |
%view_167 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_93, [512, 1280]), kwargs = {}) | |
%_param_constant193 : [#users=1] = get_attr[target=_param_constant193] | |
%addmm_31 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant193, %view_167, %t_66), kwargs = {}) | |
%view_168 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_31, [2, 256, 1280]), kwargs = {}) | |
%view_169 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_168, [2, 16, 16, 1280]), kwargs = {}) | |
%permute_49 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_169, [0, 3, 1, 2]), kwargs = {}) | |
%clone_44 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_49,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_94 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%clone_44, %div_5), kwargs = {}) | |
%view_170 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_94, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_30 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_170, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_60 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_30, 0), kwargs = {}) | |
%getitem_61 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_30, 1), kwargs = {}) | |
%add_95 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_60, 1e-05), kwargs = {}) | |
%rsqrt_30 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_95,), kwargs = {}) | |
%sub_30 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_170, %getitem_61), kwargs = {}) | |
%mul_68 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_30, %rsqrt_30), kwargs = {}) | |
%view_171 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_68, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant194 : [#users=1] = get_attr[target=_param_constant194] | |
%unsqueeze_102 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant194, 0), kwargs = {}) | |
%unsqueeze_103 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_102, 2), kwargs = {}) | |
%unsqueeze_104 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_103, 3), kwargs = {}) | |
%_param_constant195 : [#users=1] = get_attr[target=_param_constant195] | |
%unsqueeze_105 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant195, 0), kwargs = {}) | |
%unsqueeze_106 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_105, 2), kwargs = {}) | |
%unsqueeze_107 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_106, 3), kwargs = {}) | |
%mul_69 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_171, %unsqueeze_107), kwargs = {}) | |
%add_96 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_69, %unsqueeze_104), kwargs = {}) | |
%squeeze_60 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_61, 3), kwargs = {}) | |
%squeeze_61 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_60, 2), kwargs = {}) | |
%squeeze_62 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_30, 3), kwargs = {}) | |
%squeeze_63 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_62, 2), kwargs = {}) | |
%detach_40 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_61,), kwargs = {}) | |
%detach_41 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_63,), kwargs = {}) | |
%silu_16 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_96,), kwargs = {}) | |
%_param_constant196 : [#users=1] = get_attr[target=_param_constant196] | |
%_param_constant197 : [#users=1] = get_attr[target=_param_constant197] | |
%convolution_15 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_16, %_param_constant196, %_param_constant197, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_17 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant198 : [#users=1] = get_attr[target=_param_constant198] | |
%t_67 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant198,), kwargs = {}) | |
%_param_constant199 : [#users=1] = get_attr[target=_param_constant199] | |
%addmm_32 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant199, %silu_17, %t_67), kwargs = {}) | |
%slice_27 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_32, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_28 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_27, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_108 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_28, 2), kwargs = {}) | |
%unsqueeze_109 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_108, 3), kwargs = {}) | |
%add_97 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_15, %unsqueeze_109), kwargs = {}) | |
%view_172 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_97, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_31 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_172, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_62 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_31, 0), kwargs = {}) | |
%getitem_63 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_31, 1), kwargs = {}) | |
%add_98 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_62, 1e-05), kwargs = {}) | |
%rsqrt_31 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_98,), kwargs = {}) | |
%sub_31 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_172, %getitem_63), kwargs = {}) | |
%mul_70 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_31, %rsqrt_31), kwargs = {}) | |
%view_173 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_70, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant200 : [#users=1] = get_attr[target=_param_constant200] | |
%unsqueeze_110 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant200, 0), kwargs = {}) | |
%unsqueeze_111 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_110, 2), kwargs = {}) | |
%unsqueeze_112 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_111, 3), kwargs = {}) | |
%_param_constant201 : [#users=1] = get_attr[target=_param_constant201] | |
%unsqueeze_113 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant201, 0), kwargs = {}) | |
%unsqueeze_114 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_113, 2), kwargs = {}) | |
%unsqueeze_115 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_114, 3), kwargs = {}) | |
%mul_71 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_173, %unsqueeze_115), kwargs = {}) | |
%add_99 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_71, %unsqueeze_112), kwargs = {}) | |
%squeeze_64 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_63, 3), kwargs = {}) | |
%squeeze_65 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_64, 2), kwargs = {}) | |
%squeeze_66 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_31, 3), kwargs = {}) | |
%squeeze_67 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_66, 2), kwargs = {}) | |
%detach_42 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_65,), kwargs = {}) | |
%detach_43 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_67,), kwargs = {}) | |
%silu_18 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_99,), kwargs = {}) | |
%_param_constant202 : [#users=1] = get_attr[target=_param_constant202] | |
%_param_constant203 : [#users=1] = get_attr[target=_param_constant203] | |
%convolution_16 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_18, %_param_constant202, %_param_constant203, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_100 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%add_94, %convolution_16), kwargs = {}) | |
%div_6 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_100, 1.0), kwargs = {}) | |
%view_174 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_6, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_32 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_174, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_64 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_32, 0), kwargs = {}) | |
%getitem_65 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_32, 1), kwargs = {}) | |
%add_101 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_64, 1e-06), kwargs = {}) | |
%rsqrt_32 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_101,), kwargs = {}) | |
%sub_32 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_174, %getitem_65), kwargs = {}) | |
%mul_72 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_32, %rsqrt_32), kwargs = {}) | |
%view_175 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_72, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant204 : [#users=1] = get_attr[target=_param_constant204] | |
%unsqueeze_116 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant204, 0), kwargs = {}) | |
%unsqueeze_117 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_116, 2), kwargs = {}) | |
%unsqueeze_118 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_117, 3), kwargs = {}) | |
%_param_constant205 : [#users=1] = get_attr[target=_param_constant205] | |
%unsqueeze_119 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant205, 0), kwargs = {}) | |
%unsqueeze_120 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_119, 2), kwargs = {}) | |
%unsqueeze_121 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_120, 3), kwargs = {}) | |
%mul_73 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_175, %unsqueeze_121), kwargs = {}) | |
%add_102 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_73, %unsqueeze_118), kwargs = {}) | |
%squeeze_68 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_65, 3), kwargs = {}) | |
%squeeze_69 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_68, 2), kwargs = {}) | |
%squeeze_70 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_32, 3), kwargs = {}) | |
%squeeze_71 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_70, 2), kwargs = {}) | |
%detach_44 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_69,), kwargs = {}) | |
%detach_45 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_71,), kwargs = {}) | |
%permute_50 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_102, [0, 2, 3, 1]), kwargs = {}) | |
%view_176 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_50, [2, 256, 1280]), kwargs = {}) | |
%_param_constant206 : [#users=1] = get_attr[target=_param_constant206] | |
%t_68 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant206,), kwargs = {}) | |
%expand_11 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_176, [2, 256, 1280]), kwargs = {}) | |
%view_177 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_11, [2, 256, 1280]), kwargs = {}) | |
%expand_12 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_68, [2, 1280, 1280]), kwargs = {}) | |
%view_178 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_12, [2, 1280, 1280]), kwargs = {}) | |
%bmm_15 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_177, %view_178), kwargs = {}) | |
%_unsafe_view_75 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_15, [2, 256, 1280]), kwargs = {}) | |
%_param_constant207 : [#users=1] = get_attr[target=_param_constant207] | |
%add_103 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_75, %_param_constant207), kwargs = {}) | |
%var_mean_33 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_103, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_66 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_33, 0), kwargs = {}) | |
%getitem_67 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_33, 1), kwargs = {}) | |
%add_104 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_66, 1e-05), kwargs = {}) | |
%rsqrt_33 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_104,), kwargs = {}) | |
%sub_33 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_103, %getitem_67), kwargs = {}) | |
%mul_74 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_33, %rsqrt_33), kwargs = {}) | |
%_param_constant208 : [#users=1] = get_attr[target=_param_constant208] | |
%mul_75 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_74, %_param_constant208), kwargs = {}) | |
%_param_constant209 : [#users=1] = get_attr[target=_param_constant209] | |
%add_105 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_75, %_param_constant209), kwargs = {}) | |
%_param_constant210 : [#users=1] = get_attr[target=_param_constant210] | |
%t_69 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant210,), kwargs = {}) | |
%view_179 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_105, [512, 1280]), kwargs = {}) | |
%mm_30 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_179, %t_69), kwargs = {}) | |
%_unsafe_view_76 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_30, [2, 256, 1280]), kwargs = {}) | |
%view_180 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_76, [2, 256, 20, 64]), kwargs = {}) | |
%permute_51 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_180, [0, 2, 1, 3]), kwargs = {}) | |
%clone_45 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_51,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_77 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_45, [40, 256, 64]), kwargs = {}) | |
%_param_constant211 : [#users=1] = get_attr[target=_param_constant211] | |
%t_70 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant211,), kwargs = {}) | |
%view_181 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_105, [512, 1280]), kwargs = {}) | |
%mm_31 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_181, %t_70), kwargs = {}) | |
%_unsafe_view_78 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_31, [2, 256, 1280]), kwargs = {}) | |
%_param_constant212 : [#users=1] = get_attr[target=_param_constant212] | |
%t_71 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant212,), kwargs = {}) | |
%view_182 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_105, [512, 1280]), kwargs = {}) | |
%mm_32 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_182, %t_71), kwargs = {}) | |
%_unsafe_view_79 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_32, [2, 256, 1280]), kwargs = {}) | |
%view_183 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_78, [2, 256, 20, 64]), kwargs = {}) | |
%permute_52 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_183, [0, 2, 1, 3]), kwargs = {}) | |
%clone_46 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_52,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_80 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_46, [40, 256, 64]), kwargs = {}) | |
%view_184 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_79, [2, 256, 20, 64]), kwargs = {}) | |
%permute_53 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_184, [0, 2, 1, 3]), kwargs = {}) | |
%clone_47 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_53,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_81 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_47, [40, 256, 64]), kwargs = {}) | |
%empty_10 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 256],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_10 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_80, -1, -2), kwargs = {}) | |
%baddbmm_10 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_10, %_unsafe_view_77, %transpose_10), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_10 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_10, -1, False), kwargs = {}) | |
%detach_46 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_10,), kwargs = {}) | |
%bmm_16 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_10, %_unsafe_view_81), kwargs = {}) | |
%view_185 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_16, [2, 20, 256, 64]), kwargs = {}) | |
%permute_54 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_185, [0, 2, 1, 3]), kwargs = {}) | |
%clone_48 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_54,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_82 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_48, [2, 256, 1280]), kwargs = {}) | |
%_param_constant213 : [#users=1] = get_attr[target=_param_constant213] | |
%t_72 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant213,), kwargs = {}) | |
%view_186 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_82, [512, 1280]), kwargs = {}) | |
%_param_constant214 : [#users=1] = get_attr[target=_param_constant214] | |
%addmm_33 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant214, %view_186, %t_72), kwargs = {}) | |
%view_187 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_33, [2, 256, 1280]), kwargs = {}) | |
%add_106 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_187, %add_103), kwargs = {}) | |
%var_mean_34 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_106, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_68 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_34, 0), kwargs = {}) | |
%getitem_69 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_34, 1), kwargs = {}) | |
%add_107 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_68, 1e-05), kwargs = {}) | |
%rsqrt_34 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_107,), kwargs = {}) | |
%sub_34 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_106, %getitem_69), kwargs = {}) | |
%mul_76 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_34, %rsqrt_34), kwargs = {}) | |
%_param_constant215 : [#users=1] = get_attr[target=_param_constant215] | |
%mul_77 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_76, %_param_constant215), kwargs = {}) | |
%_param_constant216 : [#users=1] = get_attr[target=_param_constant216] | |
%add_108 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_77, %_param_constant216), kwargs = {}) | |
%_param_constant217 : [#users=1] = get_attr[target=_param_constant217] | |
%t_73 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant217,), kwargs = {}) | |
%view_188 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_108, [512, 1280]), kwargs = {}) | |
%mm_33 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_188, %t_73), kwargs = {}) | |
%_unsafe_view_83 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_33, [2, 256, 1280]), kwargs = {}) | |
%view_189 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_83, [2, 256, 20, 64]), kwargs = {}) | |
%permute_55 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_189, [0, 2, 1, 3]), kwargs = {}) | |
%clone_49 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_55,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_84 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_49, [40, 256, 64]), kwargs = {}) | |
%_param_constant218 : [#users=1] = get_attr[target=_param_constant218] | |
%t_74 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant218,), kwargs = {}) | |
%view_190 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_34 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_190, %t_74), kwargs = {}) | |
%_unsafe_view_85 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_34, [2, 64, 1280]), kwargs = {}) | |
%_param_constant219 : [#users=1] = get_attr[target=_param_constant219] | |
%t_75 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant219,), kwargs = {}) | |
%view_191 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_35 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_191, %t_75), kwargs = {}) | |
%_unsafe_view_86 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_35, [2, 64, 1280]), kwargs = {}) | |
%view_192 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_85, [2, 64, 20, 64]), kwargs = {}) | |
%permute_56 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_192, [0, 2, 1, 3]), kwargs = {}) | |
%clone_50 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_56,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_87 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_50, [40, 64, 64]), kwargs = {}) | |
%view_193 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_86, [2, 64, 20, 64]), kwargs = {}) | |
%permute_57 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_193, [0, 2, 1, 3]), kwargs = {}) | |
%clone_51 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_57,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_88 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_51, [40, 64, 64]), kwargs = {}) | |
%empty_11 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_11 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_87, -1, -2), kwargs = {}) | |
%baddbmm_11 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_11, %_unsafe_view_84, %transpose_11), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_11 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_11, -1, False), kwargs = {}) | |
%detach_47 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_11,), kwargs = {}) | |
%bmm_17 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_11, %_unsafe_view_88), kwargs = {}) | |
%view_194 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_17, [2, 20, 256, 64]), kwargs = {}) | |
%permute_58 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_194, [0, 2, 1, 3]), kwargs = {}) | |
%clone_52 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_58,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_89 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_52, [2, 256, 1280]), kwargs = {}) | |
%_param_constant220 : [#users=1] = get_attr[target=_param_constant220] | |
%t_76 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant220,), kwargs = {}) | |
%view_195 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_89, [512, 1280]), kwargs = {}) | |
%_param_constant221 : [#users=1] = get_attr[target=_param_constant221] | |
%addmm_34 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant221, %view_195, %t_76), kwargs = {}) | |
%view_196 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_34, [2, 256, 1280]), kwargs = {}) | |
%add_109 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_196, %add_106), kwargs = {}) | |
%var_mean_35 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_109, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_70 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_35, 0), kwargs = {}) | |
%getitem_71 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_35, 1), kwargs = {}) | |
%add_110 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_70, 1e-05), kwargs = {}) | |
%rsqrt_35 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_110,), kwargs = {}) | |
%sub_35 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_109, %getitem_71), kwargs = {}) | |
%mul_78 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_35, %rsqrt_35), kwargs = {}) | |
%_param_constant222 : [#users=1] = get_attr[target=_param_constant222] | |
%mul_79 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_78, %_param_constant222), kwargs = {}) | |
%_param_constant223 : [#users=1] = get_attr[target=_param_constant223] | |
%add_111 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_79, %_param_constant223), kwargs = {}) | |
%_param_constant224 : [#users=1] = get_attr[target=_param_constant224] | |
%t_77 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant224,), kwargs = {}) | |
%view_197 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_111, [512, 1280]), kwargs = {}) | |
%_param_constant225 : [#users=1] = get_attr[target=_param_constant225] | |
%addmm_35 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant225, %view_197, %t_77), kwargs = {}) | |
%view_198 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_35, [2, 256, 10240]), kwargs = {}) | |
%slice_29 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_198, -1, 0, 5120), kwargs = {}) | |
%slice_30 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_198, -1, 5120, 10240), kwargs = {}) | |
%gelu_5 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_30,), kwargs = {}) | |
%mul_80 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_29, %gelu_5), kwargs = {}) | |
%_param_constant226 : [#users=1] = get_attr[target=_param_constant226] | |
%t_78 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant226,), kwargs = {}) | |
%view_199 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_80, [512, 5120]), kwargs = {}) | |
%_param_constant227 : [#users=1] = get_attr[target=_param_constant227] | |
%addmm_36 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant227, %view_199, %t_78), kwargs = {}) | |
%view_200 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_36, [2, 256, 1280]), kwargs = {}) | |
%add_112 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_200, %add_109), kwargs = {}) | |
%_param_constant228 : [#users=1] = get_attr[target=_param_constant228] | |
%t_79 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant228,), kwargs = {}) | |
%view_201 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_112, [512, 1280]), kwargs = {}) | |
%_param_constant229 : [#users=1] = get_attr[target=_param_constant229] | |
%addmm_37 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant229, %view_201, %t_79), kwargs = {}) | |
%view_202 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_37, [2, 256, 1280]), kwargs = {}) | |
%view_203 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_202, [2, 16, 16, 1280]), kwargs = {}) | |
%permute_59 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_203, [0, 3, 1, 2]), kwargs = {}) | |
%clone_53 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_59,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_113 : [#users=2] = call_function[target=torch.ops.aten.add](args = (%clone_53, %div_6), kwargs = {}) | |
%_param_constant230 : [#users=1] = get_attr[target=_param_constant230] | |
%_param_constant231 : [#users=1] = get_attr[target=_param_constant231] | |
%convolution_17 : [#users=3] = call_function[target=torch.ops.aten.convolution](args = (%add_113, %_param_constant230, %_param_constant231, [2, 2], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%view_204 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%convolution_17, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_36 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_204, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_72 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_36, 0), kwargs = {}) | |
%getitem_73 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_36, 1), kwargs = {}) | |
%add_114 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_72, 1e-05), kwargs = {}) | |
%rsqrt_36 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_114,), kwargs = {}) | |
%sub_36 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_204, %getitem_73), kwargs = {}) | |
%mul_81 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_36, %rsqrt_36), kwargs = {}) | |
%view_205 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_81, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant232 : [#users=1] = get_attr[target=_param_constant232] | |
%unsqueeze_122 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant232, 0), kwargs = {}) | |
%unsqueeze_123 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_122, 2), kwargs = {}) | |
%unsqueeze_124 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_123, 3), kwargs = {}) | |
%_param_constant233 : [#users=1] = get_attr[target=_param_constant233] | |
%unsqueeze_125 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant233, 0), kwargs = {}) | |
%unsqueeze_126 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_125, 2), kwargs = {}) | |
%unsqueeze_127 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_126, 3), kwargs = {}) | |
%mul_82 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_205, %unsqueeze_127), kwargs = {}) | |
%add_115 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_82, %unsqueeze_124), kwargs = {}) | |
%squeeze_72 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_73, 3), kwargs = {}) | |
%squeeze_73 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_72, 2), kwargs = {}) | |
%squeeze_74 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_36, 3), kwargs = {}) | |
%squeeze_75 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_74, 2), kwargs = {}) | |
%detach_48 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_73,), kwargs = {}) | |
%detach_49 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_75,), kwargs = {}) | |
%silu_19 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_115,), kwargs = {}) | |
%_param_constant234 : [#users=1] = get_attr[target=_param_constant234] | |
%_param_constant235 : [#users=1] = get_attr[target=_param_constant235] | |
%convolution_18 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_19, %_param_constant234, %_param_constant235, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_20 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant236 : [#users=1] = get_attr[target=_param_constant236] | |
%t_80 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant236,), kwargs = {}) | |
%_param_constant237 : [#users=1] = get_attr[target=_param_constant237] | |
%addmm_38 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant237, %silu_20, %t_80), kwargs = {}) | |
%slice_31 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_38, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_32 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_31, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_128 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_32, 2), kwargs = {}) | |
%unsqueeze_129 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_128, 3), kwargs = {}) | |
%add_116 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_18, %unsqueeze_129), kwargs = {}) | |
%view_206 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_116, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_37 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_206, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_74 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_37, 0), kwargs = {}) | |
%getitem_75 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_37, 1), kwargs = {}) | |
%add_117 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_74, 1e-05), kwargs = {}) | |
%rsqrt_37 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_117,), kwargs = {}) | |
%sub_37 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_206, %getitem_75), kwargs = {}) | |
%mul_83 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_37, %rsqrt_37), kwargs = {}) | |
%view_207 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_83, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant238 : [#users=1] = get_attr[target=_param_constant238] | |
%unsqueeze_130 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant238, 0), kwargs = {}) | |
%unsqueeze_131 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_130, 2), kwargs = {}) | |
%unsqueeze_132 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_131, 3), kwargs = {}) | |
%_param_constant239 : [#users=1] = get_attr[target=_param_constant239] | |
%unsqueeze_133 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant239, 0), kwargs = {}) | |
%unsqueeze_134 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_133, 2), kwargs = {}) | |
%unsqueeze_135 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_134, 3), kwargs = {}) | |
%mul_84 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_207, %unsqueeze_135), kwargs = {}) | |
%add_118 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_84, %unsqueeze_132), kwargs = {}) | |
%squeeze_76 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_75, 3), kwargs = {}) | |
%squeeze_77 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_76, 2), kwargs = {}) | |
%squeeze_78 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_37, 3), kwargs = {}) | |
%squeeze_79 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_78, 2), kwargs = {}) | |
%detach_50 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_77,), kwargs = {}) | |
%detach_51 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_79,), kwargs = {}) | |
%silu_21 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_118,), kwargs = {}) | |
%_param_constant240 : [#users=1] = get_attr[target=_param_constant240] | |
%_param_constant241 : [#users=1] = get_attr[target=_param_constant241] | |
%convolution_19 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_21, %_param_constant240, %_param_constant241, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_119 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_17, %convolution_19), kwargs = {}) | |
%div_7 : [#users=3] = call_function[target=torch.ops.aten.div](args = (%add_119, 1.0), kwargs = {}) | |
%view_208 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_7, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_38 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_208, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_76 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_38, 0), kwargs = {}) | |
%getitem_77 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_38, 1), kwargs = {}) | |
%add_120 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_76, 1e-05), kwargs = {}) | |
%rsqrt_38 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_120,), kwargs = {}) | |
%sub_38 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_208, %getitem_77), kwargs = {}) | |
%mul_85 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_38, %rsqrt_38), kwargs = {}) | |
%view_209 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_85, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant242 : [#users=1] = get_attr[target=_param_constant242] | |
%unsqueeze_136 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant242, 0), kwargs = {}) | |
%unsqueeze_137 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_136, 2), kwargs = {}) | |
%unsqueeze_138 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_137, 3), kwargs = {}) | |
%_param_constant243 : [#users=1] = get_attr[target=_param_constant243] | |
%unsqueeze_139 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant243, 0), kwargs = {}) | |
%unsqueeze_140 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_139, 2), kwargs = {}) | |
%unsqueeze_141 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_140, 3), kwargs = {}) | |
%mul_86 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_209, %unsqueeze_141), kwargs = {}) | |
%add_121 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_86, %unsqueeze_138), kwargs = {}) | |
%squeeze_80 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_77, 3), kwargs = {}) | |
%squeeze_81 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_80, 2), kwargs = {}) | |
%squeeze_82 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_38, 3), kwargs = {}) | |
%squeeze_83 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_82, 2), kwargs = {}) | |
%detach_52 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_81,), kwargs = {}) | |
%detach_53 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_83,), kwargs = {}) | |
%silu_22 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_121,), kwargs = {}) | |
%_param_constant244 : [#users=1] = get_attr[target=_param_constant244] | |
%_param_constant245 : [#users=1] = get_attr[target=_param_constant245] | |
%convolution_20 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_22, %_param_constant244, %_param_constant245, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_23 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant246 : [#users=1] = get_attr[target=_param_constant246] | |
%t_81 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant246,), kwargs = {}) | |
%_param_constant247 : [#users=1] = get_attr[target=_param_constant247] | |
%addmm_39 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant247, %silu_23, %t_81), kwargs = {}) | |
%slice_33 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_39, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_34 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_33, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_142 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_34, 2), kwargs = {}) | |
%unsqueeze_143 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_142, 3), kwargs = {}) | |
%add_122 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_20, %unsqueeze_143), kwargs = {}) | |
%view_210 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_122, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_39 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_210, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_78 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_39, 0), kwargs = {}) | |
%getitem_79 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_39, 1), kwargs = {}) | |
%add_123 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_78, 1e-05), kwargs = {}) | |
%rsqrt_39 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_123,), kwargs = {}) | |
%sub_39 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_210, %getitem_79), kwargs = {}) | |
%mul_87 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_39, %rsqrt_39), kwargs = {}) | |
%view_211 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_87, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant248 : [#users=1] = get_attr[target=_param_constant248] | |
%unsqueeze_144 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant248, 0), kwargs = {}) | |
%unsqueeze_145 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_144, 2), kwargs = {}) | |
%unsqueeze_146 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_145, 3), kwargs = {}) | |
%_param_constant249 : [#users=1] = get_attr[target=_param_constant249] | |
%unsqueeze_147 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant249, 0), kwargs = {}) | |
%unsqueeze_148 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_147, 2), kwargs = {}) | |
%unsqueeze_149 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_148, 3), kwargs = {}) | |
%mul_88 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_211, %unsqueeze_149), kwargs = {}) | |
%add_124 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_88, %unsqueeze_146), kwargs = {}) | |
%squeeze_84 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_79, 3), kwargs = {}) | |
%squeeze_85 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_84, 2), kwargs = {}) | |
%squeeze_86 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_39, 3), kwargs = {}) | |
%squeeze_87 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_86, 2), kwargs = {}) | |
%detach_54 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_85,), kwargs = {}) | |
%detach_55 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_87,), kwargs = {}) | |
%silu_24 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_124,), kwargs = {}) | |
%_param_constant250 : [#users=1] = get_attr[target=_param_constant250] | |
%_param_constant251 : [#users=1] = get_attr[target=_param_constant251] | |
%convolution_21 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_24, %_param_constant250, %_param_constant251, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_125 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%div_7, %convolution_21), kwargs = {}) | |
%div_8 : [#users=3] = call_function[target=torch.ops.aten.div](args = (%add_125, 1.0), kwargs = {}) | |
%view_212 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_8, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_40 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_212, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_80 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_40, 0), kwargs = {}) | |
%getitem_81 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_40, 1), kwargs = {}) | |
%add_126 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_80, 1e-05), kwargs = {}) | |
%rsqrt_40 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_126,), kwargs = {}) | |
%sub_40 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_212, %getitem_81), kwargs = {}) | |
%mul_89 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_40, %rsqrt_40), kwargs = {}) | |
%view_213 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_89, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant252 : [#users=1] = get_attr[target=_param_constant252] | |
%unsqueeze_150 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant252, 0), kwargs = {}) | |
%unsqueeze_151 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_150, 2), kwargs = {}) | |
%unsqueeze_152 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_151, 3), kwargs = {}) | |
%_param_constant253 : [#users=1] = get_attr[target=_param_constant253] | |
%unsqueeze_153 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant253, 0), kwargs = {}) | |
%unsqueeze_154 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_153, 2), kwargs = {}) | |
%unsqueeze_155 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_154, 3), kwargs = {}) | |
%mul_90 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_213, %unsqueeze_155), kwargs = {}) | |
%add_127 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_90, %unsqueeze_152), kwargs = {}) | |
%squeeze_88 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_81, 3), kwargs = {}) | |
%squeeze_89 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_88, 2), kwargs = {}) | |
%squeeze_90 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_40, 3), kwargs = {}) | |
%squeeze_91 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_90, 2), kwargs = {}) | |
%detach_56 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_89,), kwargs = {}) | |
%detach_57 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_91,), kwargs = {}) | |
%silu_25 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_127,), kwargs = {}) | |
%_param_constant254 : [#users=1] = get_attr[target=_param_constant254] | |
%_param_constant255 : [#users=1] = get_attr[target=_param_constant255] | |
%convolution_22 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_25, %_param_constant254, %_param_constant255, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_26 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant256 : [#users=1] = get_attr[target=_param_constant256] | |
%t_82 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant256,), kwargs = {}) | |
%_param_constant257 : [#users=1] = get_attr[target=_param_constant257] | |
%addmm_40 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant257, %silu_26, %t_82), kwargs = {}) | |
%slice_35 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_40, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_36 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_35, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_156 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_36, 2), kwargs = {}) | |
%unsqueeze_157 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_156, 3), kwargs = {}) | |
%add_128 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_22, %unsqueeze_157), kwargs = {}) | |
%view_214 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_128, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_41 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_214, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_82 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_41, 0), kwargs = {}) | |
%getitem_83 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_41, 1), kwargs = {}) | |
%add_129 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_82, 1e-05), kwargs = {}) | |
%rsqrt_41 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_129,), kwargs = {}) | |
%sub_41 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_214, %getitem_83), kwargs = {}) | |
%mul_91 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_41, %rsqrt_41), kwargs = {}) | |
%view_215 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_91, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant258 : [#users=1] = get_attr[target=_param_constant258] | |
%unsqueeze_158 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant258, 0), kwargs = {}) | |
%unsqueeze_159 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_158, 2), kwargs = {}) | |
%unsqueeze_160 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_159, 3), kwargs = {}) | |
%_param_constant259 : [#users=1] = get_attr[target=_param_constant259] | |
%unsqueeze_161 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant259, 0), kwargs = {}) | |
%unsqueeze_162 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_161, 2), kwargs = {}) | |
%unsqueeze_163 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_162, 3), kwargs = {}) | |
%mul_92 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_215, %unsqueeze_163), kwargs = {}) | |
%add_130 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_92, %unsqueeze_160), kwargs = {}) | |
%squeeze_92 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_83, 3), kwargs = {}) | |
%squeeze_93 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_92, 2), kwargs = {}) | |
%squeeze_94 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_41, 3), kwargs = {}) | |
%squeeze_95 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_94, 2), kwargs = {}) | |
%detach_58 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_93,), kwargs = {}) | |
%detach_59 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_95,), kwargs = {}) | |
%silu_27 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_130,), kwargs = {}) | |
%_param_constant260 : [#users=1] = get_attr[target=_param_constant260] | |
%_param_constant261 : [#users=1] = get_attr[target=_param_constant261] | |
%convolution_23 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_27, %_param_constant260, %_param_constant261, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_131 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%div_8, %convolution_23), kwargs = {}) | |
%div_9 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_131, 1), kwargs = {}) | |
%view_216 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_9, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_42 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_216, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_84 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_42, 0), kwargs = {}) | |
%getitem_85 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_42, 1), kwargs = {}) | |
%add_132 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_84, 1e-06), kwargs = {}) | |
%rsqrt_42 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_132,), kwargs = {}) | |
%sub_42 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_216, %getitem_85), kwargs = {}) | |
%mul_93 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_42, %rsqrt_42), kwargs = {}) | |
%view_217 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_93, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant262 : [#users=1] = get_attr[target=_param_constant262] | |
%unsqueeze_164 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant262, 0), kwargs = {}) | |
%unsqueeze_165 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_164, 2), kwargs = {}) | |
%unsqueeze_166 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_165, 3), kwargs = {}) | |
%_param_constant263 : [#users=1] = get_attr[target=_param_constant263] | |
%unsqueeze_167 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant263, 0), kwargs = {}) | |
%unsqueeze_168 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_167, 2), kwargs = {}) | |
%unsqueeze_169 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_168, 3), kwargs = {}) | |
%mul_94 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_217, %unsqueeze_169), kwargs = {}) | |
%add_133 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_94, %unsqueeze_166), kwargs = {}) | |
%squeeze_96 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_85, 3), kwargs = {}) | |
%squeeze_97 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_96, 2), kwargs = {}) | |
%squeeze_98 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_42, 3), kwargs = {}) | |
%squeeze_99 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_98, 2), kwargs = {}) | |
%detach_60 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_97,), kwargs = {}) | |
%detach_61 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_99,), kwargs = {}) | |
%permute_60 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_133, [0, 2, 3, 1]), kwargs = {}) | |
%view_218 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_60, [2, 64, 1280]), kwargs = {}) | |
%_param_constant264 : [#users=1] = get_attr[target=_param_constant264] | |
%t_83 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant264,), kwargs = {}) | |
%expand_13 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_218, [2, 64, 1280]), kwargs = {}) | |
%view_219 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_13, [2, 64, 1280]), kwargs = {}) | |
%expand_14 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_83, [2, 1280, 1280]), kwargs = {}) | |
%view_220 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_14, [2, 1280, 1280]), kwargs = {}) | |
%bmm_18 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_219, %view_220), kwargs = {}) | |
%_unsafe_view_90 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_18, [2, 64, 1280]), kwargs = {}) | |
%_param_constant265 : [#users=1] = get_attr[target=_param_constant265] | |
%add_134 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_90, %_param_constant265), kwargs = {}) | |
%var_mean_43 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_134, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_86 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_43, 0), kwargs = {}) | |
%getitem_87 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_43, 1), kwargs = {}) | |
%add_135 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_86, 1e-05), kwargs = {}) | |
%rsqrt_43 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_135,), kwargs = {}) | |
%sub_43 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_134, %getitem_87), kwargs = {}) | |
%mul_95 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_43, %rsqrt_43), kwargs = {}) | |
%_param_constant266 : [#users=1] = get_attr[target=_param_constant266] | |
%mul_96 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_95, %_param_constant266), kwargs = {}) | |
%_param_constant267 : [#users=1] = get_attr[target=_param_constant267] | |
%add_136 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_96, %_param_constant267), kwargs = {}) | |
%_param_constant268 : [#users=1] = get_attr[target=_param_constant268] | |
%t_84 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant268,), kwargs = {}) | |
%view_221 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_136, [128, 1280]), kwargs = {}) | |
%mm_36 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_221, %t_84), kwargs = {}) | |
%_unsafe_view_91 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_36, [2, 64, 1280]), kwargs = {}) | |
%view_222 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_91, [2, 64, 20, 64]), kwargs = {}) | |
%permute_61 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_222, [0, 2, 1, 3]), kwargs = {}) | |
%clone_54 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_61,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_92 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_54, [40, 64, 64]), kwargs = {}) | |
%_param_constant269 : [#users=1] = get_attr[target=_param_constant269] | |
%t_85 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant269,), kwargs = {}) | |
%view_223 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_136, [128, 1280]), kwargs = {}) | |
%mm_37 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_223, %t_85), kwargs = {}) | |
%_unsafe_view_93 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_37, [2, 64, 1280]), kwargs = {}) | |
%_param_constant270 : [#users=1] = get_attr[target=_param_constant270] | |
%t_86 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant270,), kwargs = {}) | |
%view_224 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_136, [128, 1280]), kwargs = {}) | |
%mm_38 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_224, %t_86), kwargs = {}) | |
%_unsafe_view_94 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_38, [2, 64, 1280]), kwargs = {}) | |
%view_225 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_93, [2, 64, 20, 64]), kwargs = {}) | |
%permute_62 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_225, [0, 2, 1, 3]), kwargs = {}) | |
%clone_55 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_62,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_95 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_55, [40, 64, 64]), kwargs = {}) | |
%view_226 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_94, [2, 64, 20, 64]), kwargs = {}) | |
%permute_63 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_226, [0, 2, 1, 3]), kwargs = {}) | |
%clone_56 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_63,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_96 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_56, [40, 64, 64]), kwargs = {}) | |
%empty_12 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 64, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_12 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_95, -1, -2), kwargs = {}) | |
%baddbmm_12 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_12, %_unsafe_view_92, %transpose_12), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_12 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_12, -1, False), kwargs = {}) | |
%detach_62 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_12,), kwargs = {}) | |
%bmm_19 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_12, %_unsafe_view_96), kwargs = {}) | |
%view_227 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_19, [2, 20, 64, 64]), kwargs = {}) | |
%permute_64 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_227, [0, 2, 1, 3]), kwargs = {}) | |
%clone_57 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_64,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_97 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_57, [2, 64, 1280]), kwargs = {}) | |
%_param_constant271 : [#users=1] = get_attr[target=_param_constant271] | |
%t_87 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant271,), kwargs = {}) | |
%view_228 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_97, [128, 1280]), kwargs = {}) | |
%_param_constant272 : [#users=1] = get_attr[target=_param_constant272] | |
%addmm_41 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant272, %view_228, %t_87), kwargs = {}) | |
%view_229 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_41, [2, 64, 1280]), kwargs = {}) | |
%add_137 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_229, %add_134), kwargs = {}) | |
%var_mean_44 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_137, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_88 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_44, 0), kwargs = {}) | |
%getitem_89 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_44, 1), kwargs = {}) | |
%add_138 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_88, 1e-05), kwargs = {}) | |
%rsqrt_44 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_138,), kwargs = {}) | |
%sub_44 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_137, %getitem_89), kwargs = {}) | |
%mul_97 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_44, %rsqrt_44), kwargs = {}) | |
%_param_constant273 : [#users=1] = get_attr[target=_param_constant273] | |
%mul_98 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_97, %_param_constant273), kwargs = {}) | |
%_param_constant274 : [#users=1] = get_attr[target=_param_constant274] | |
%add_139 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_98, %_param_constant274), kwargs = {}) | |
%_param_constant275 : [#users=1] = get_attr[target=_param_constant275] | |
%t_88 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant275,), kwargs = {}) | |
%view_230 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_139, [128, 1280]), kwargs = {}) | |
%mm_39 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_230, %t_88), kwargs = {}) | |
%_unsafe_view_98 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_39, [2, 64, 1280]), kwargs = {}) | |
%view_231 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_98, [2, 64, 20, 64]), kwargs = {}) | |
%permute_65 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_231, [0, 2, 1, 3]), kwargs = {}) | |
%clone_58 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_65,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_99 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_58, [40, 64, 64]), kwargs = {}) | |
%_param_constant276 : [#users=1] = get_attr[target=_param_constant276] | |
%t_89 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant276,), kwargs = {}) | |
%view_232 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_40 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_232, %t_89), kwargs = {}) | |
%_unsafe_view_100 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_40, [2, 64, 1280]), kwargs = {}) | |
%_param_constant277 : [#users=1] = get_attr[target=_param_constant277] | |
%t_90 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant277,), kwargs = {}) | |
%view_233 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_41 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_233, %t_90), kwargs = {}) | |
%_unsafe_view_101 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_41, [2, 64, 1280]), kwargs = {}) | |
%view_234 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_100, [2, 64, 20, 64]), kwargs = {}) | |
%permute_66 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_234, [0, 2, 1, 3]), kwargs = {}) | |
%clone_59 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_66,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_102 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_59, [40, 64, 64]), kwargs = {}) | |
%view_235 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_101, [2, 64, 20, 64]), kwargs = {}) | |
%permute_67 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_235, [0, 2, 1, 3]), kwargs = {}) | |
%clone_60 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_67,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_103 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_60, [40, 64, 64]), kwargs = {}) | |
%empty_13 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 64, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_13 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_102, -1, -2), kwargs = {}) | |
%baddbmm_13 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_13, %_unsafe_view_99, %transpose_13), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_13 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_13, -1, False), kwargs = {}) | |
%detach_63 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_13,), kwargs = {}) | |
%bmm_20 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_13, %_unsafe_view_103), kwargs = {}) | |
%view_236 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_20, [2, 20, 64, 64]), kwargs = {}) | |
%permute_68 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_236, [0, 2, 1, 3]), kwargs = {}) | |
%clone_61 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_68,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_104 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_61, [2, 64, 1280]), kwargs = {}) | |
%_param_constant278 : [#users=1] = get_attr[target=_param_constant278] | |
%t_91 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant278,), kwargs = {}) | |
%view_237 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_104, [128, 1280]), kwargs = {}) | |
%_param_constant279 : [#users=1] = get_attr[target=_param_constant279] | |
%addmm_42 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant279, %view_237, %t_91), kwargs = {}) | |
%view_238 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_42, [2, 64, 1280]), kwargs = {}) | |
%add_140 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_238, %add_137), kwargs = {}) | |
%var_mean_45 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_140, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_90 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_45, 0), kwargs = {}) | |
%getitem_91 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_45, 1), kwargs = {}) | |
%add_141 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_90, 1e-05), kwargs = {}) | |
%rsqrt_45 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_141,), kwargs = {}) | |
%sub_45 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_140, %getitem_91), kwargs = {}) | |
%mul_99 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_45, %rsqrt_45), kwargs = {}) | |
%_param_constant280 : [#users=1] = get_attr[target=_param_constant280] | |
%mul_100 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_99, %_param_constant280), kwargs = {}) | |
%_param_constant281 : [#users=1] = get_attr[target=_param_constant281] | |
%add_142 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_100, %_param_constant281), kwargs = {}) | |
%_param_constant282 : [#users=1] = get_attr[target=_param_constant282] | |
%t_92 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant282,), kwargs = {}) | |
%view_239 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_142, [128, 1280]), kwargs = {}) | |
%_param_constant283 : [#users=1] = get_attr[target=_param_constant283] | |
%addmm_43 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant283, %view_239, %t_92), kwargs = {}) | |
%view_240 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_43, [2, 64, 10240]), kwargs = {}) | |
%slice_37 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_240, -1, 0, 5120), kwargs = {}) | |
%slice_38 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_240, -1, 5120, 10240), kwargs = {}) | |
%gelu_6 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_38,), kwargs = {}) | |
%mul_101 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_37, %gelu_6), kwargs = {}) | |
%_param_constant284 : [#users=1] = get_attr[target=_param_constant284] | |
%t_93 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant284,), kwargs = {}) | |
%view_241 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_101, [128, 5120]), kwargs = {}) | |
%_param_constant285 : [#users=1] = get_attr[target=_param_constant285] | |
%addmm_44 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant285, %view_241, %t_93), kwargs = {}) | |
%view_242 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_44, [2, 64, 1280]), kwargs = {}) | |
%add_143 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_242, %add_140), kwargs = {}) | |
%_param_constant286 : [#users=1] = get_attr[target=_param_constant286] | |
%t_94 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant286,), kwargs = {}) | |
%view_243 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_143, [128, 1280]), kwargs = {}) | |
%_param_constant287 : [#users=1] = get_attr[target=_param_constant287] | |
%addmm_45 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant287, %view_243, %t_94), kwargs = {}) | |
%view_244 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_45, [2, 64, 1280]), kwargs = {}) | |
%view_245 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_244, [2, 8, 8, 1280]), kwargs = {}) | |
%permute_69 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_245, [0, 3, 1, 2]), kwargs = {}) | |
%clone_62 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_69,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_144 : [#users=2] = call_function[target=torch.ops.aten.add](args = (%clone_62, %div_9), kwargs = {}) | |
%view_246 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_144, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_46 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_246, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_92 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_46, 0), kwargs = {}) | |
%getitem_93 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_46, 1), kwargs = {}) | |
%add_145 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_92, 1e-05), kwargs = {}) | |
%rsqrt_46 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_145,), kwargs = {}) | |
%sub_46 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_246, %getitem_93), kwargs = {}) | |
%mul_102 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_46, %rsqrt_46), kwargs = {}) | |
%view_247 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_102, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant288 : [#users=1] = get_attr[target=_param_constant288] | |
%unsqueeze_170 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant288, 0), kwargs = {}) | |
%unsqueeze_171 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_170, 2), kwargs = {}) | |
%unsqueeze_172 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_171, 3), kwargs = {}) | |
%_param_constant289 : [#users=1] = get_attr[target=_param_constant289] | |
%unsqueeze_173 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant289, 0), kwargs = {}) | |
%unsqueeze_174 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_173, 2), kwargs = {}) | |
%unsqueeze_175 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_174, 3), kwargs = {}) | |
%mul_103 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_247, %unsqueeze_175), kwargs = {}) | |
%add_146 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_103, %unsqueeze_172), kwargs = {}) | |
%squeeze_100 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_93, 3), kwargs = {}) | |
%squeeze_101 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_100, 2), kwargs = {}) | |
%squeeze_102 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_46, 3), kwargs = {}) | |
%squeeze_103 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_102, 2), kwargs = {}) | |
%detach_64 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_101,), kwargs = {}) | |
%detach_65 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_103,), kwargs = {}) | |
%silu_28 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_146,), kwargs = {}) | |
%_param_constant290 : [#users=1] = get_attr[target=_param_constant290] | |
%_param_constant291 : [#users=1] = get_attr[target=_param_constant291] | |
%convolution_24 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_28, %_param_constant290, %_param_constant291, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_29 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant292 : [#users=1] = get_attr[target=_param_constant292] | |
%t_95 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant292,), kwargs = {}) | |
%_param_constant293 : [#users=1] = get_attr[target=_param_constant293] | |
%addmm_46 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant293, %silu_29, %t_95), kwargs = {}) | |
%slice_39 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_46, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_40 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_39, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_176 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_40, 2), kwargs = {}) | |
%unsqueeze_177 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_176, 3), kwargs = {}) | |
%add_147 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_24, %unsqueeze_177), kwargs = {}) | |
%view_248 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_147, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_47 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_248, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_94 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_47, 0), kwargs = {}) | |
%getitem_95 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_47, 1), kwargs = {}) | |
%add_148 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_94, 1e-05), kwargs = {}) | |
%rsqrt_47 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_148,), kwargs = {}) | |
%sub_47 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_248, %getitem_95), kwargs = {}) | |
%mul_104 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_47, %rsqrt_47), kwargs = {}) | |
%view_249 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_104, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant294 : [#users=1] = get_attr[target=_param_constant294] | |
%unsqueeze_178 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant294, 0), kwargs = {}) | |
%unsqueeze_179 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_178, 2), kwargs = {}) | |
%unsqueeze_180 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_179, 3), kwargs = {}) | |
%_param_constant295 : [#users=1] = get_attr[target=_param_constant295] | |
%unsqueeze_181 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant295, 0), kwargs = {}) | |
%unsqueeze_182 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_181, 2), kwargs = {}) | |
%unsqueeze_183 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_182, 3), kwargs = {}) | |
%mul_105 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_249, %unsqueeze_183), kwargs = {}) | |
%add_149 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_105, %unsqueeze_180), kwargs = {}) | |
%squeeze_104 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_95, 3), kwargs = {}) | |
%squeeze_105 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_104, 2), kwargs = {}) | |
%squeeze_106 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_47, 3), kwargs = {}) | |
%squeeze_107 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_106, 2), kwargs = {}) | |
%detach_66 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_105,), kwargs = {}) | |
%detach_67 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_107,), kwargs = {}) | |
%silu_30 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_149,), kwargs = {}) | |
%_param_constant296 : [#users=1] = get_attr[target=_param_constant296] | |
%_param_constant297 : [#users=1] = get_attr[target=_param_constant297] | |
%convolution_25 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_30, %_param_constant296, %_param_constant297, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_150 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%add_144, %convolution_25), kwargs = {}) | |
%div_10 : [#users=1] = call_function[target=torch.ops.aten.div](args = (%add_150, 1), kwargs = {}) | |
%cat_3 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%div_10, %div_8], 1), kwargs = {}) | |
%view_250 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_3, [2, 32, 80, 64]), kwargs = {}) | |
%var_mean_48 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_250, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_96 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_48, 0), kwargs = {}) | |
%getitem_97 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_48, 1), kwargs = {}) | |
%add_151 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_96, 1e-05), kwargs = {}) | |
%rsqrt_48 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_151,), kwargs = {}) | |
%sub_48 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_250, %getitem_97), kwargs = {}) | |
%mul_106 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_48, %rsqrt_48), kwargs = {}) | |
%view_251 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_106, [2, 2560, 8, 8]), kwargs = {}) | |
%_param_constant298 : [#users=1] = get_attr[target=_param_constant298] | |
%unsqueeze_184 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant298, 0), kwargs = {}) | |
%unsqueeze_185 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_184, 2), kwargs = {}) | |
%unsqueeze_186 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_185, 3), kwargs = {}) | |
%_param_constant299 : [#users=1] = get_attr[target=_param_constant299] | |
%unsqueeze_187 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant299, 0), kwargs = {}) | |
%unsqueeze_188 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_187, 2), kwargs = {}) | |
%unsqueeze_189 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_188, 3), kwargs = {}) | |
%mul_107 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_251, %unsqueeze_189), kwargs = {}) | |
%add_152 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_107, %unsqueeze_186), kwargs = {}) | |
%squeeze_108 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_97, 3), kwargs = {}) | |
%squeeze_109 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_108, 2), kwargs = {}) | |
%squeeze_110 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_48, 3), kwargs = {}) | |
%squeeze_111 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_110, 2), kwargs = {}) | |
%detach_68 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_109,), kwargs = {}) | |
%detach_69 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_111,), kwargs = {}) | |
%silu_31 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_152,), kwargs = {}) | |
%_param_constant300 : [#users=1] = get_attr[target=_param_constant300] | |
%_param_constant301 : [#users=1] = get_attr[target=_param_constant301] | |
%convolution_26 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_31, %_param_constant300, %_param_constant301, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_32 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant302 : [#users=1] = get_attr[target=_param_constant302] | |
%t_96 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant302,), kwargs = {}) | |
%_param_constant303 : [#users=1] = get_attr[target=_param_constant303] | |
%addmm_47 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant303, %silu_32, %t_96), kwargs = {}) | |
%slice_41 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_47, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_42 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_41, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_190 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_42, 2), kwargs = {}) | |
%unsqueeze_191 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_190, 3), kwargs = {}) | |
%add_153 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_26, %unsqueeze_191), kwargs = {}) | |
%view_252 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_153, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_49 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_252, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_98 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_49, 0), kwargs = {}) | |
%getitem_99 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_49, 1), kwargs = {}) | |
%add_154 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_98, 1e-05), kwargs = {}) | |
%rsqrt_49 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_154,), kwargs = {}) | |
%sub_49 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_252, %getitem_99), kwargs = {}) | |
%mul_108 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_49, %rsqrt_49), kwargs = {}) | |
%view_253 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_108, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant304 : [#users=1] = get_attr[target=_param_constant304] | |
%unsqueeze_192 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant304, 0), kwargs = {}) | |
%unsqueeze_193 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_192, 2), kwargs = {}) | |
%unsqueeze_194 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_193, 3), kwargs = {}) | |
%_param_constant305 : [#users=1] = get_attr[target=_param_constant305] | |
%unsqueeze_195 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant305, 0), kwargs = {}) | |
%unsqueeze_196 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_195, 2), kwargs = {}) | |
%unsqueeze_197 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_196, 3), kwargs = {}) | |
%mul_109 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_253, %unsqueeze_197), kwargs = {}) | |
%add_155 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_109, %unsqueeze_194), kwargs = {}) | |
%squeeze_112 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_99, 3), kwargs = {}) | |
%squeeze_113 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_112, 2), kwargs = {}) | |
%squeeze_114 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_49, 3), kwargs = {}) | |
%squeeze_115 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_114, 2), kwargs = {}) | |
%detach_70 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_113,), kwargs = {}) | |
%detach_71 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_115,), kwargs = {}) | |
%silu_33 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_155,), kwargs = {}) | |
%_param_constant306 : [#users=1] = get_attr[target=_param_constant306] | |
%_param_constant307 : [#users=1] = get_attr[target=_param_constant307] | |
%convolution_27 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_33, %_param_constant306, %_param_constant307, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant308 : [#users=1] = get_attr[target=_param_constant308] | |
%_param_constant309 : [#users=1] = get_attr[target=_param_constant309] | |
%convolution_28 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_3, %_param_constant308, %_param_constant309, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_156 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_28, %convolution_27), kwargs = {}) | |
%div_11 : [#users=1] = call_function[target=torch.ops.aten.div](args = (%add_156, 1.0), kwargs = {}) | |
%cat_4 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%div_11, %div_7], 1), kwargs = {}) | |
%view_254 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_4, [2, 32, 80, 64]), kwargs = {}) | |
%var_mean_50 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_254, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_100 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_50, 0), kwargs = {}) | |
%getitem_101 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_50, 1), kwargs = {}) | |
%add_157 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_100, 1e-05), kwargs = {}) | |
%rsqrt_50 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_157,), kwargs = {}) | |
%sub_50 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_254, %getitem_101), kwargs = {}) | |
%mul_110 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_50, %rsqrt_50), kwargs = {}) | |
%view_255 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_110, [2, 2560, 8, 8]), kwargs = {}) | |
%_param_constant310 : [#users=1] = get_attr[target=_param_constant310] | |
%unsqueeze_198 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant310, 0), kwargs = {}) | |
%unsqueeze_199 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_198, 2), kwargs = {}) | |
%unsqueeze_200 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_199, 3), kwargs = {}) | |
%_param_constant311 : [#users=1] = get_attr[target=_param_constant311] | |
%unsqueeze_201 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant311, 0), kwargs = {}) | |
%unsqueeze_202 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_201, 2), kwargs = {}) | |
%unsqueeze_203 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_202, 3), kwargs = {}) | |
%mul_111 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_255, %unsqueeze_203), kwargs = {}) | |
%add_158 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_111, %unsqueeze_200), kwargs = {}) | |
%squeeze_116 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_101, 3), kwargs = {}) | |
%squeeze_117 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_116, 2), kwargs = {}) | |
%squeeze_118 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_50, 3), kwargs = {}) | |
%squeeze_119 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_118, 2), kwargs = {}) | |
%detach_72 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_117,), kwargs = {}) | |
%detach_73 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_119,), kwargs = {}) | |
%silu_34 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_158,), kwargs = {}) | |
%_param_constant312 : [#users=1] = get_attr[target=_param_constant312] | |
%_param_constant313 : [#users=1] = get_attr[target=_param_constant313] | |
%convolution_29 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_34, %_param_constant312, %_param_constant313, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_35 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant314 : [#users=1] = get_attr[target=_param_constant314] | |
%t_97 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant314,), kwargs = {}) | |
%_param_constant315 : [#users=1] = get_attr[target=_param_constant315] | |
%addmm_48 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant315, %silu_35, %t_97), kwargs = {}) | |
%slice_43 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_48, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_44 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_43, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_204 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_44, 2), kwargs = {}) | |
%unsqueeze_205 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_204, 3), kwargs = {}) | |
%add_159 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_29, %unsqueeze_205), kwargs = {}) | |
%view_256 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_159, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_51 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_256, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_102 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_51, 0), kwargs = {}) | |
%getitem_103 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_51, 1), kwargs = {}) | |
%add_160 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_102, 1e-05), kwargs = {}) | |
%rsqrt_51 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_160,), kwargs = {}) | |
%sub_51 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_256, %getitem_103), kwargs = {}) | |
%mul_112 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_51, %rsqrt_51), kwargs = {}) | |
%view_257 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_112, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant316 : [#users=1] = get_attr[target=_param_constant316] | |
%unsqueeze_206 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant316, 0), kwargs = {}) | |
%unsqueeze_207 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_206, 2), kwargs = {}) | |
%unsqueeze_208 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_207, 3), kwargs = {}) | |
%_param_constant317 : [#users=1] = get_attr[target=_param_constant317] | |
%unsqueeze_209 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant317, 0), kwargs = {}) | |
%unsqueeze_210 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_209, 2), kwargs = {}) | |
%unsqueeze_211 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_210, 3), kwargs = {}) | |
%mul_113 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_257, %unsqueeze_211), kwargs = {}) | |
%add_161 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_113, %unsqueeze_208), kwargs = {}) | |
%squeeze_120 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_103, 3), kwargs = {}) | |
%squeeze_121 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_120, 2), kwargs = {}) | |
%squeeze_122 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_51, 3), kwargs = {}) | |
%squeeze_123 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_122, 2), kwargs = {}) | |
%detach_74 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_121,), kwargs = {}) | |
%detach_75 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_123,), kwargs = {}) | |
%silu_36 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_161,), kwargs = {}) | |
%_param_constant318 : [#users=1] = get_attr[target=_param_constant318] | |
%_param_constant319 : [#users=1] = get_attr[target=_param_constant319] | |
%convolution_30 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_36, %_param_constant318, %_param_constant319, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant320 : [#users=1] = get_attr[target=_param_constant320] | |
%_param_constant321 : [#users=1] = get_attr[target=_param_constant321] | |
%convolution_31 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_4, %_param_constant320, %_param_constant321, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_162 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_31, %convolution_30), kwargs = {}) | |
%div_12 : [#users=1] = call_function[target=torch.ops.aten.div](args = (%add_162, 1.0), kwargs = {}) | |
%cat_5 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%div_12, %convolution_17], 1), kwargs = {}) | |
%view_258 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_5, [2, 32, 80, 64]), kwargs = {}) | |
%var_mean_52 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_258, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_104 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_52, 0), kwargs = {}) | |
%getitem_105 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_52, 1), kwargs = {}) | |
%add_163 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_104, 1e-05), kwargs = {}) | |
%rsqrt_52 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_163,), kwargs = {}) | |
%sub_52 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_258, %getitem_105), kwargs = {}) | |
%mul_114 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_52, %rsqrt_52), kwargs = {}) | |
%view_259 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_114, [2, 2560, 8, 8]), kwargs = {}) | |
%_param_constant322 : [#users=1] = get_attr[target=_param_constant322] | |
%unsqueeze_212 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant322, 0), kwargs = {}) | |
%unsqueeze_213 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_212, 2), kwargs = {}) | |
%unsqueeze_214 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_213, 3), kwargs = {}) | |
%_param_constant323 : [#users=1] = get_attr[target=_param_constant323] | |
%unsqueeze_215 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant323, 0), kwargs = {}) | |
%unsqueeze_216 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_215, 2), kwargs = {}) | |
%unsqueeze_217 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_216, 3), kwargs = {}) | |
%mul_115 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_259, %unsqueeze_217), kwargs = {}) | |
%add_164 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_115, %unsqueeze_214), kwargs = {}) | |
%squeeze_124 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_105, 3), kwargs = {}) | |
%squeeze_125 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_124, 2), kwargs = {}) | |
%squeeze_126 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_52, 3), kwargs = {}) | |
%squeeze_127 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_126, 2), kwargs = {}) | |
%detach_76 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_125,), kwargs = {}) | |
%detach_77 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_127,), kwargs = {}) | |
%silu_37 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_164,), kwargs = {}) | |
%_param_constant324 : [#users=1] = get_attr[target=_param_constant324] | |
%_param_constant325 : [#users=1] = get_attr[target=_param_constant325] | |
%convolution_32 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_37, %_param_constant324, %_param_constant325, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_38 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant326 : [#users=1] = get_attr[target=_param_constant326] | |
%t_98 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant326,), kwargs = {}) | |
%_param_constant327 : [#users=1] = get_attr[target=_param_constant327] | |
%addmm_49 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant327, %silu_38, %t_98), kwargs = {}) | |
%slice_45 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_49, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_46 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_45, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_218 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_46, 2), kwargs = {}) | |
%unsqueeze_219 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_218, 3), kwargs = {}) | |
%add_165 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_32, %unsqueeze_219), kwargs = {}) | |
%view_260 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_165, [2, 32, 40, 64]), kwargs = {}) | |
%var_mean_53 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_260, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_106 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_53, 0), kwargs = {}) | |
%getitem_107 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_53, 1), kwargs = {}) | |
%add_166 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_106, 1e-05), kwargs = {}) | |
%rsqrt_53 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_166,), kwargs = {}) | |
%sub_53 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_260, %getitem_107), kwargs = {}) | |
%mul_116 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_53, %rsqrt_53), kwargs = {}) | |
%view_261 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_116, [2, 1280, 8, 8]), kwargs = {}) | |
%_param_constant328 : [#users=1] = get_attr[target=_param_constant328] | |
%unsqueeze_220 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant328, 0), kwargs = {}) | |
%unsqueeze_221 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_220, 2), kwargs = {}) | |
%unsqueeze_222 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_221, 3), kwargs = {}) | |
%_param_constant329 : [#users=1] = get_attr[target=_param_constant329] | |
%unsqueeze_223 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant329, 0), kwargs = {}) | |
%unsqueeze_224 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_223, 2), kwargs = {}) | |
%unsqueeze_225 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_224, 3), kwargs = {}) | |
%mul_117 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_261, %unsqueeze_225), kwargs = {}) | |
%add_167 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_117, %unsqueeze_222), kwargs = {}) | |
%squeeze_128 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_107, 3), kwargs = {}) | |
%squeeze_129 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_128, 2), kwargs = {}) | |
%squeeze_130 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_53, 3), kwargs = {}) | |
%squeeze_131 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_130, 2), kwargs = {}) | |
%detach_78 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_129,), kwargs = {}) | |
%detach_79 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_131,), kwargs = {}) | |
%silu_39 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_167,), kwargs = {}) | |
%_param_constant330 : [#users=1] = get_attr[target=_param_constant330] | |
%_param_constant331 : [#users=1] = get_attr[target=_param_constant331] | |
%convolution_33 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_39, %_param_constant330, %_param_constant331, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant332 : [#users=1] = get_attr[target=_param_constant332] | |
%_param_constant333 : [#users=1] = get_attr[target=_param_constant333] | |
%convolution_34 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_5, %_param_constant332, %_param_constant333, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_168 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_34, %convolution_33), kwargs = {}) | |
%div_13 : [#users=1] = call_function[target=torch.ops.aten.div](args = (%add_168, 1.0), kwargs = {}) | |
%upsample_nearest2d : [#users=1] = call_function[target=torch.ops.aten.upsample_nearest2d](args = (%div_13, [16, 16], 2.0, 2.0), kwargs = {}) | |
%_param_constant334 : [#users=1] = get_attr[target=_param_constant334] | |
%_param_constant335 : [#users=1] = get_attr[target=_param_constant335] | |
%convolution_35 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%upsample_nearest2d, %_param_constant334, %_param_constant335, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%cat_6 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%convolution_35, %add_113], 1), kwargs = {}) | |
%view_262 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_6, [2, 32, 80, 256]), kwargs = {}) | |
%var_mean_54 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_262, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_108 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_54, 0), kwargs = {}) | |
%getitem_109 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_54, 1), kwargs = {}) | |
%add_169 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_108, 1e-05), kwargs = {}) | |
%rsqrt_54 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_169,), kwargs = {}) | |
%sub_54 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_262, %getitem_109), kwargs = {}) | |
%mul_118 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_54, %rsqrt_54), kwargs = {}) | |
%view_263 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_118, [2, 2560, 16, 16]), kwargs = {}) | |
%_param_constant336 : [#users=1] = get_attr[target=_param_constant336] | |
%unsqueeze_226 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant336, 0), kwargs = {}) | |
%unsqueeze_227 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_226, 2), kwargs = {}) | |
%unsqueeze_228 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_227, 3), kwargs = {}) | |
%_param_constant337 : [#users=1] = get_attr[target=_param_constant337] | |
%unsqueeze_229 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant337, 0), kwargs = {}) | |
%unsqueeze_230 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_229, 2), kwargs = {}) | |
%unsqueeze_231 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_230, 3), kwargs = {}) | |
%mul_119 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_263, %unsqueeze_231), kwargs = {}) | |
%add_170 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_119, %unsqueeze_228), kwargs = {}) | |
%squeeze_132 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_109, 3), kwargs = {}) | |
%squeeze_133 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_132, 2), kwargs = {}) | |
%squeeze_134 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_54, 3), kwargs = {}) | |
%squeeze_135 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_134, 2), kwargs = {}) | |
%detach_80 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_133,), kwargs = {}) | |
%detach_81 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_135,), kwargs = {}) | |
%silu_40 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_170,), kwargs = {}) | |
%_param_constant338 : [#users=1] = get_attr[target=_param_constant338] | |
%_param_constant339 : [#users=1] = get_attr[target=_param_constant339] | |
%convolution_36 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_40, %_param_constant338, %_param_constant339, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_41 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant340 : [#users=1] = get_attr[target=_param_constant340] | |
%t_99 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant340,), kwargs = {}) | |
%_param_constant341 : [#users=1] = get_attr[target=_param_constant341] | |
%addmm_50 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant341, %silu_41, %t_99), kwargs = {}) | |
%slice_47 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_50, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_48 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_47, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_232 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_48, 2), kwargs = {}) | |
%unsqueeze_233 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_232, 3), kwargs = {}) | |
%add_171 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_36, %unsqueeze_233), kwargs = {}) | |
%view_264 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_171, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_55 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_264, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_110 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_55, 0), kwargs = {}) | |
%getitem_111 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_55, 1), kwargs = {}) | |
%add_172 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_110, 1e-05), kwargs = {}) | |
%rsqrt_55 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_172,), kwargs = {}) | |
%sub_55 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_264, %getitem_111), kwargs = {}) | |
%mul_120 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_55, %rsqrt_55), kwargs = {}) | |
%view_265 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_120, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant342 : [#users=1] = get_attr[target=_param_constant342] | |
%unsqueeze_234 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant342, 0), kwargs = {}) | |
%unsqueeze_235 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_234, 2), kwargs = {}) | |
%unsqueeze_236 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_235, 3), kwargs = {}) | |
%_param_constant343 : [#users=1] = get_attr[target=_param_constant343] | |
%unsqueeze_237 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant343, 0), kwargs = {}) | |
%unsqueeze_238 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_237, 2), kwargs = {}) | |
%unsqueeze_239 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_238, 3), kwargs = {}) | |
%mul_121 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_265, %unsqueeze_239), kwargs = {}) | |
%add_173 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_121, %unsqueeze_236), kwargs = {}) | |
%squeeze_136 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_111, 3), kwargs = {}) | |
%squeeze_137 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_136, 2), kwargs = {}) | |
%squeeze_138 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_55, 3), kwargs = {}) | |
%squeeze_139 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_138, 2), kwargs = {}) | |
%detach_82 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_137,), kwargs = {}) | |
%detach_83 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_139,), kwargs = {}) | |
%silu_42 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_173,), kwargs = {}) | |
%_param_constant344 : [#users=1] = get_attr[target=_param_constant344] | |
%_param_constant345 : [#users=1] = get_attr[target=_param_constant345] | |
%convolution_37 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_42, %_param_constant344, %_param_constant345, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant346 : [#users=1] = get_attr[target=_param_constant346] | |
%_param_constant347 : [#users=1] = get_attr[target=_param_constant347] | |
%convolution_38 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_6, %_param_constant346, %_param_constant347, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_174 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_38, %convolution_37), kwargs = {}) | |
%div_14 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_174, 1.0), kwargs = {}) | |
%view_266 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_14, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_56 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_266, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_112 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_56, 0), kwargs = {}) | |
%getitem_113 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_56, 1), kwargs = {}) | |
%add_175 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_112, 1e-06), kwargs = {}) | |
%rsqrt_56 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_175,), kwargs = {}) | |
%sub_56 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_266, %getitem_113), kwargs = {}) | |
%mul_122 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_56, %rsqrt_56), kwargs = {}) | |
%view_267 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_122, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant348 : [#users=1] = get_attr[target=_param_constant348] | |
%unsqueeze_240 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant348, 0), kwargs = {}) | |
%unsqueeze_241 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_240, 2), kwargs = {}) | |
%unsqueeze_242 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_241, 3), kwargs = {}) | |
%_param_constant349 : [#users=1] = get_attr[target=_param_constant349] | |
%unsqueeze_243 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant349, 0), kwargs = {}) | |
%unsqueeze_244 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_243, 2), kwargs = {}) | |
%unsqueeze_245 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_244, 3), kwargs = {}) | |
%mul_123 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_267, %unsqueeze_245), kwargs = {}) | |
%add_176 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_123, %unsqueeze_242), kwargs = {}) | |
%squeeze_140 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_113, 3), kwargs = {}) | |
%squeeze_141 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_140, 2), kwargs = {}) | |
%squeeze_142 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_56, 3), kwargs = {}) | |
%squeeze_143 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_142, 2), kwargs = {}) | |
%detach_84 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_141,), kwargs = {}) | |
%detach_85 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_143,), kwargs = {}) | |
%permute_70 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_176, [0, 2, 3, 1]), kwargs = {}) | |
%view_268 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_70, [2, 256, 1280]), kwargs = {}) | |
%_param_constant350 : [#users=1] = get_attr[target=_param_constant350] | |
%t_100 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant350,), kwargs = {}) | |
%expand_15 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_268, [2, 256, 1280]), kwargs = {}) | |
%view_269 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_15, [2, 256, 1280]), kwargs = {}) | |
%expand_16 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_100, [2, 1280, 1280]), kwargs = {}) | |
%view_270 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_16, [2, 1280, 1280]), kwargs = {}) | |
%bmm_21 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_269, %view_270), kwargs = {}) | |
%_unsafe_view_105 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_21, [2, 256, 1280]), kwargs = {}) | |
%_param_constant351 : [#users=1] = get_attr[target=_param_constant351] | |
%add_177 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_105, %_param_constant351), kwargs = {}) | |
%var_mean_57 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_177, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_114 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_57, 0), kwargs = {}) | |
%getitem_115 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_57, 1), kwargs = {}) | |
%add_178 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_114, 1e-05), kwargs = {}) | |
%rsqrt_57 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_178,), kwargs = {}) | |
%sub_57 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_177, %getitem_115), kwargs = {}) | |
%mul_124 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_57, %rsqrt_57), kwargs = {}) | |
%_param_constant352 : [#users=1] = get_attr[target=_param_constant352] | |
%mul_125 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_124, %_param_constant352), kwargs = {}) | |
%_param_constant353 : [#users=1] = get_attr[target=_param_constant353] | |
%add_179 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_125, %_param_constant353), kwargs = {}) | |
%_param_constant354 : [#users=1] = get_attr[target=_param_constant354] | |
%t_101 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant354,), kwargs = {}) | |
%view_271 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_179, [512, 1280]), kwargs = {}) | |
%mm_42 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_271, %t_101), kwargs = {}) | |
%_unsafe_view_106 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_42, [2, 256, 1280]), kwargs = {}) | |
%view_272 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_106, [2, 256, 20, 64]), kwargs = {}) | |
%permute_71 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_272, [0, 2, 1, 3]), kwargs = {}) | |
%clone_63 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_71,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_107 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_63, [40, 256, 64]), kwargs = {}) | |
%_param_constant355 : [#users=1] = get_attr[target=_param_constant355] | |
%t_102 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant355,), kwargs = {}) | |
%view_273 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_179, [512, 1280]), kwargs = {}) | |
%mm_43 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_273, %t_102), kwargs = {}) | |
%_unsafe_view_108 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_43, [2, 256, 1280]), kwargs = {}) | |
%_param_constant356 : [#users=1] = get_attr[target=_param_constant356] | |
%t_103 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant356,), kwargs = {}) | |
%view_274 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_179, [512, 1280]), kwargs = {}) | |
%mm_44 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_274, %t_103), kwargs = {}) | |
%_unsafe_view_109 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_44, [2, 256, 1280]), kwargs = {}) | |
%view_275 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_108, [2, 256, 20, 64]), kwargs = {}) | |
%permute_72 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_275, [0, 2, 1, 3]), kwargs = {}) | |
%clone_64 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_72,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_110 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_64, [40, 256, 64]), kwargs = {}) | |
%view_276 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_109, [2, 256, 20, 64]), kwargs = {}) | |
%permute_73 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_276, [0, 2, 1, 3]), kwargs = {}) | |
%clone_65 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_73,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_111 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_65, [40, 256, 64]), kwargs = {}) | |
%empty_14 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 256],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_14 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_110, -1, -2), kwargs = {}) | |
%baddbmm_14 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_14, %_unsafe_view_107, %transpose_14), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_14 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_14, -1, False), kwargs = {}) | |
%detach_86 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_14,), kwargs = {}) | |
%bmm_22 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_14, %_unsafe_view_111), kwargs = {}) | |
%view_277 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_22, [2, 20, 256, 64]), kwargs = {}) | |
%permute_74 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_277, [0, 2, 1, 3]), kwargs = {}) | |
%clone_66 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_74,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_112 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_66, [2, 256, 1280]), kwargs = {}) | |
%_param_constant357 : [#users=1] = get_attr[target=_param_constant357] | |
%t_104 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant357,), kwargs = {}) | |
%view_278 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_112, [512, 1280]), kwargs = {}) | |
%_param_constant358 : [#users=1] = get_attr[target=_param_constant358] | |
%addmm_51 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant358, %view_278, %t_104), kwargs = {}) | |
%view_279 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_51, [2, 256, 1280]), kwargs = {}) | |
%add_180 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_279, %add_177), kwargs = {}) | |
%var_mean_58 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_180, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_116 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_58, 0), kwargs = {}) | |
%getitem_117 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_58, 1), kwargs = {}) | |
%add_181 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_116, 1e-05), kwargs = {}) | |
%rsqrt_58 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_181,), kwargs = {}) | |
%sub_58 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_180, %getitem_117), kwargs = {}) | |
%mul_126 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_58, %rsqrt_58), kwargs = {}) | |
%_param_constant359 : [#users=1] = get_attr[target=_param_constant359] | |
%mul_127 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_126, %_param_constant359), kwargs = {}) | |
%_param_constant360 : [#users=1] = get_attr[target=_param_constant360] | |
%add_182 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_127, %_param_constant360), kwargs = {}) | |
%_param_constant361 : [#users=1] = get_attr[target=_param_constant361] | |
%t_105 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant361,), kwargs = {}) | |
%view_280 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_182, [512, 1280]), kwargs = {}) | |
%mm_45 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_280, %t_105), kwargs = {}) | |
%_unsafe_view_113 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_45, [2, 256, 1280]), kwargs = {}) | |
%view_281 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_113, [2, 256, 20, 64]), kwargs = {}) | |
%permute_75 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_281, [0, 2, 1, 3]), kwargs = {}) | |
%clone_67 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_75,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_114 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_67, [40, 256, 64]), kwargs = {}) | |
%_param_constant362 : [#users=1] = get_attr[target=_param_constant362] | |
%t_106 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant362,), kwargs = {}) | |
%view_282 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_46 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_282, %t_106), kwargs = {}) | |
%_unsafe_view_115 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_46, [2, 64, 1280]), kwargs = {}) | |
%_param_constant363 : [#users=1] = get_attr[target=_param_constant363] | |
%t_107 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant363,), kwargs = {}) | |
%view_283 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_47 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_283, %t_107), kwargs = {}) | |
%_unsafe_view_116 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_47, [2, 64, 1280]), kwargs = {}) | |
%view_284 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_115, [2, 64, 20, 64]), kwargs = {}) | |
%permute_76 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_284, [0, 2, 1, 3]), kwargs = {}) | |
%clone_68 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_76,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_117 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_68, [40, 64, 64]), kwargs = {}) | |
%view_285 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_116, [2, 64, 20, 64]), kwargs = {}) | |
%permute_77 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_285, [0, 2, 1, 3]), kwargs = {}) | |
%clone_69 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_77,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_118 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_69, [40, 64, 64]), kwargs = {}) | |
%empty_15 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_15 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_117, -1, -2), kwargs = {}) | |
%baddbmm_15 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_15, %_unsafe_view_114, %transpose_15), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_15 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_15, -1, False), kwargs = {}) | |
%detach_87 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_15,), kwargs = {}) | |
%bmm_23 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_15, %_unsafe_view_118), kwargs = {}) | |
%view_286 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_23, [2, 20, 256, 64]), kwargs = {}) | |
%permute_78 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_286, [0, 2, 1, 3]), kwargs = {}) | |
%clone_70 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_78,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_119 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_70, [2, 256, 1280]), kwargs = {}) | |
%_param_constant364 : [#users=1] = get_attr[target=_param_constant364] | |
%t_108 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant364,), kwargs = {}) | |
%view_287 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_119, [512, 1280]), kwargs = {}) | |
%_param_constant365 : [#users=1] = get_attr[target=_param_constant365] | |
%addmm_52 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant365, %view_287, %t_108), kwargs = {}) | |
%view_288 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_52, [2, 256, 1280]), kwargs = {}) | |
%add_183 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_288, %add_180), kwargs = {}) | |
%var_mean_59 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_183, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_118 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_59, 0), kwargs = {}) | |
%getitem_119 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_59, 1), kwargs = {}) | |
%add_184 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_118, 1e-05), kwargs = {}) | |
%rsqrt_59 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_184,), kwargs = {}) | |
%sub_59 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_183, %getitem_119), kwargs = {}) | |
%mul_128 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_59, %rsqrt_59), kwargs = {}) | |
%_param_constant366 : [#users=1] = get_attr[target=_param_constant366] | |
%mul_129 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_128, %_param_constant366), kwargs = {}) | |
%_param_constant367 : [#users=1] = get_attr[target=_param_constant367] | |
%add_185 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_129, %_param_constant367), kwargs = {}) | |
%_param_constant368 : [#users=1] = get_attr[target=_param_constant368] | |
%t_109 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant368,), kwargs = {}) | |
%view_289 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_185, [512, 1280]), kwargs = {}) | |
%_param_constant369 : [#users=1] = get_attr[target=_param_constant369] | |
%addmm_53 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant369, %view_289, %t_109), kwargs = {}) | |
%view_290 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_53, [2, 256, 10240]), kwargs = {}) | |
%slice_49 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_290, -1, 0, 5120), kwargs = {}) | |
%slice_50 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_290, -1, 5120, 10240), kwargs = {}) | |
%gelu_7 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_50,), kwargs = {}) | |
%mul_130 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_49, %gelu_7), kwargs = {}) | |
%_param_constant370 : [#users=1] = get_attr[target=_param_constant370] | |
%t_110 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant370,), kwargs = {}) | |
%view_291 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_130, [512, 5120]), kwargs = {}) | |
%_param_constant371 : [#users=1] = get_attr[target=_param_constant371] | |
%addmm_54 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant371, %view_291, %t_110), kwargs = {}) | |
%view_292 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_54, [2, 256, 1280]), kwargs = {}) | |
%add_186 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_292, %add_183), kwargs = {}) | |
%_param_constant372 : [#users=1] = get_attr[target=_param_constant372] | |
%t_111 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant372,), kwargs = {}) | |
%view_293 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_186, [512, 1280]), kwargs = {}) | |
%_param_constant373 : [#users=1] = get_attr[target=_param_constant373] | |
%addmm_55 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant373, %view_293, %t_111), kwargs = {}) | |
%view_294 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_55, [2, 256, 1280]), kwargs = {}) | |
%view_295 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_294, [2, 16, 16, 1280]), kwargs = {}) | |
%permute_79 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_295, [0, 3, 1, 2]), kwargs = {}) | |
%clone_71 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_79,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_187 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%clone_71, %div_14), kwargs = {}) | |
%cat_7 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%add_187, %add_94], 1), kwargs = {}) | |
%view_296 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_7, [2, 32, 80, 256]), kwargs = {}) | |
%var_mean_60 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_296, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_120 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_60, 0), kwargs = {}) | |
%getitem_121 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_60, 1), kwargs = {}) | |
%add_188 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_120, 1e-05), kwargs = {}) | |
%rsqrt_60 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_188,), kwargs = {}) | |
%sub_60 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_296, %getitem_121), kwargs = {}) | |
%mul_131 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_60, %rsqrt_60), kwargs = {}) | |
%view_297 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_131, [2, 2560, 16, 16]), kwargs = {}) | |
%_param_constant374 : [#users=1] = get_attr[target=_param_constant374] | |
%unsqueeze_246 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant374, 0), kwargs = {}) | |
%unsqueeze_247 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_246, 2), kwargs = {}) | |
%unsqueeze_248 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_247, 3), kwargs = {}) | |
%_param_constant375 : [#users=1] = get_attr[target=_param_constant375] | |
%unsqueeze_249 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant375, 0), kwargs = {}) | |
%unsqueeze_250 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_249, 2), kwargs = {}) | |
%unsqueeze_251 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_250, 3), kwargs = {}) | |
%mul_132 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_297, %unsqueeze_251), kwargs = {}) | |
%add_189 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_132, %unsqueeze_248), kwargs = {}) | |
%squeeze_144 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_121, 3), kwargs = {}) | |
%squeeze_145 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_144, 2), kwargs = {}) | |
%squeeze_146 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_60, 3), kwargs = {}) | |
%squeeze_147 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_146, 2), kwargs = {}) | |
%detach_88 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_145,), kwargs = {}) | |
%detach_89 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_147,), kwargs = {}) | |
%silu_43 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_189,), kwargs = {}) | |
%_param_constant376 : [#users=1] = get_attr[target=_param_constant376] | |
%_param_constant377 : [#users=1] = get_attr[target=_param_constant377] | |
%convolution_39 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_43, %_param_constant376, %_param_constant377, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_44 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant378 : [#users=1] = get_attr[target=_param_constant378] | |
%t_112 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant378,), kwargs = {}) | |
%_param_constant379 : [#users=1] = get_attr[target=_param_constant379] | |
%addmm_56 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant379, %silu_44, %t_112), kwargs = {}) | |
%slice_51 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_56, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_52 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_51, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_252 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_52, 2), kwargs = {}) | |
%unsqueeze_253 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_252, 3), kwargs = {}) | |
%add_190 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_39, %unsqueeze_253), kwargs = {}) | |
%view_298 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_190, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_61 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_298, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_122 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_61, 0), kwargs = {}) | |
%getitem_123 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_61, 1), kwargs = {}) | |
%add_191 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_122, 1e-05), kwargs = {}) | |
%rsqrt_61 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_191,), kwargs = {}) | |
%sub_61 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_298, %getitem_123), kwargs = {}) | |
%mul_133 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_61, %rsqrt_61), kwargs = {}) | |
%view_299 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_133, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant380 : [#users=1] = get_attr[target=_param_constant380] | |
%unsqueeze_254 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant380, 0), kwargs = {}) | |
%unsqueeze_255 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_254, 2), kwargs = {}) | |
%unsqueeze_256 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_255, 3), kwargs = {}) | |
%_param_constant381 : [#users=1] = get_attr[target=_param_constant381] | |
%unsqueeze_257 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant381, 0), kwargs = {}) | |
%unsqueeze_258 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_257, 2), kwargs = {}) | |
%unsqueeze_259 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_258, 3), kwargs = {}) | |
%mul_134 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_299, %unsqueeze_259), kwargs = {}) | |
%add_192 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_134, %unsqueeze_256), kwargs = {}) | |
%squeeze_148 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_123, 3), kwargs = {}) | |
%squeeze_149 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_148, 2), kwargs = {}) | |
%squeeze_150 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_61, 3), kwargs = {}) | |
%squeeze_151 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_150, 2), kwargs = {}) | |
%detach_90 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_149,), kwargs = {}) | |
%detach_91 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_151,), kwargs = {}) | |
%silu_45 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_192,), kwargs = {}) | |
%_param_constant382 : [#users=1] = get_attr[target=_param_constant382] | |
%_param_constant383 : [#users=1] = get_attr[target=_param_constant383] | |
%convolution_40 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_45, %_param_constant382, %_param_constant383, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant384 : [#users=1] = get_attr[target=_param_constant384] | |
%_param_constant385 : [#users=1] = get_attr[target=_param_constant385] | |
%convolution_41 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_7, %_param_constant384, %_param_constant385, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_193 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_41, %convolution_40), kwargs = {}) | |
%div_15 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_193, 1.0), kwargs = {}) | |
%view_300 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_15, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_62 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_300, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_124 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_62, 0), kwargs = {}) | |
%getitem_125 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_62, 1), kwargs = {}) | |
%add_194 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_124, 1e-06), kwargs = {}) | |
%rsqrt_62 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_194,), kwargs = {}) | |
%sub_62 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_300, %getitem_125), kwargs = {}) | |
%mul_135 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_62, %rsqrt_62), kwargs = {}) | |
%view_301 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_135, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant386 : [#users=1] = get_attr[target=_param_constant386] | |
%unsqueeze_260 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant386, 0), kwargs = {}) | |
%unsqueeze_261 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_260, 2), kwargs = {}) | |
%unsqueeze_262 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_261, 3), kwargs = {}) | |
%_param_constant387 : [#users=1] = get_attr[target=_param_constant387] | |
%unsqueeze_263 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant387, 0), kwargs = {}) | |
%unsqueeze_264 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_263, 2), kwargs = {}) | |
%unsqueeze_265 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_264, 3), kwargs = {}) | |
%mul_136 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_301, %unsqueeze_265), kwargs = {}) | |
%add_195 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_136, %unsqueeze_262), kwargs = {}) | |
%squeeze_152 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_125, 3), kwargs = {}) | |
%squeeze_153 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_152, 2), kwargs = {}) | |
%squeeze_154 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_62, 3), kwargs = {}) | |
%squeeze_155 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_154, 2), kwargs = {}) | |
%detach_92 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_153,), kwargs = {}) | |
%detach_93 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_155,), kwargs = {}) | |
%permute_80 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_195, [0, 2, 3, 1]), kwargs = {}) | |
%view_302 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_80, [2, 256, 1280]), kwargs = {}) | |
%_param_constant388 : [#users=1] = get_attr[target=_param_constant388] | |
%t_113 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant388,), kwargs = {}) | |
%expand_17 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_302, [2, 256, 1280]), kwargs = {}) | |
%view_303 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_17, [2, 256, 1280]), kwargs = {}) | |
%expand_18 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_113, [2, 1280, 1280]), kwargs = {}) | |
%view_304 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_18, [2, 1280, 1280]), kwargs = {}) | |
%bmm_24 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_303, %view_304), kwargs = {}) | |
%_unsafe_view_120 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_24, [2, 256, 1280]), kwargs = {}) | |
%_param_constant389 : [#users=1] = get_attr[target=_param_constant389] | |
%add_196 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_120, %_param_constant389), kwargs = {}) | |
%var_mean_63 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_196, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_126 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_63, 0), kwargs = {}) | |
%getitem_127 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_63, 1), kwargs = {}) | |
%add_197 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_126, 1e-05), kwargs = {}) | |
%rsqrt_63 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_197,), kwargs = {}) | |
%sub_63 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_196, %getitem_127), kwargs = {}) | |
%mul_137 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_63, %rsqrt_63), kwargs = {}) | |
%_param_constant390 : [#users=1] = get_attr[target=_param_constant390] | |
%mul_138 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_137, %_param_constant390), kwargs = {}) | |
%_param_constant391 : [#users=1] = get_attr[target=_param_constant391] | |
%add_198 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_138, %_param_constant391), kwargs = {}) | |
%_param_constant392 : [#users=1] = get_attr[target=_param_constant392] | |
%t_114 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant392,), kwargs = {}) | |
%view_305 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_198, [512, 1280]), kwargs = {}) | |
%mm_48 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_305, %t_114), kwargs = {}) | |
%_unsafe_view_121 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_48, [2, 256, 1280]), kwargs = {}) | |
%view_306 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_121, [2, 256, 20, 64]), kwargs = {}) | |
%permute_81 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_306, [0, 2, 1, 3]), kwargs = {}) | |
%clone_72 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_81,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_122 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_72, [40, 256, 64]), kwargs = {}) | |
%_param_constant393 : [#users=1] = get_attr[target=_param_constant393] | |
%t_115 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant393,), kwargs = {}) | |
%view_307 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_198, [512, 1280]), kwargs = {}) | |
%mm_49 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_307, %t_115), kwargs = {}) | |
%_unsafe_view_123 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_49, [2, 256, 1280]), kwargs = {}) | |
%_param_constant394 : [#users=1] = get_attr[target=_param_constant394] | |
%t_116 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant394,), kwargs = {}) | |
%view_308 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_198, [512, 1280]), kwargs = {}) | |
%mm_50 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_308, %t_116), kwargs = {}) | |
%_unsafe_view_124 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_50, [2, 256, 1280]), kwargs = {}) | |
%view_309 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_123, [2, 256, 20, 64]), kwargs = {}) | |
%permute_82 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_309, [0, 2, 1, 3]), kwargs = {}) | |
%clone_73 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_82,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_125 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_73, [40, 256, 64]), kwargs = {}) | |
%view_310 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_124, [2, 256, 20, 64]), kwargs = {}) | |
%permute_83 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_310, [0, 2, 1, 3]), kwargs = {}) | |
%clone_74 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_83,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_126 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_74, [40, 256, 64]), kwargs = {}) | |
%empty_16 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 256],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_16 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_125, -1, -2), kwargs = {}) | |
%baddbmm_16 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_16, %_unsafe_view_122, %transpose_16), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_16 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_16, -1, False), kwargs = {}) | |
%detach_94 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_16,), kwargs = {}) | |
%bmm_25 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_16, %_unsafe_view_126), kwargs = {}) | |
%view_311 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_25, [2, 20, 256, 64]), kwargs = {}) | |
%permute_84 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_311, [0, 2, 1, 3]), kwargs = {}) | |
%clone_75 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_84,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_127 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_75, [2, 256, 1280]), kwargs = {}) | |
%_param_constant395 : [#users=1] = get_attr[target=_param_constant395] | |
%t_117 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant395,), kwargs = {}) | |
%view_312 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_127, [512, 1280]), kwargs = {}) | |
%_param_constant396 : [#users=1] = get_attr[target=_param_constant396] | |
%addmm_57 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant396, %view_312, %t_117), kwargs = {}) | |
%view_313 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_57, [2, 256, 1280]), kwargs = {}) | |
%add_199 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_313, %add_196), kwargs = {}) | |
%var_mean_64 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_199, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_128 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_64, 0), kwargs = {}) | |
%getitem_129 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_64, 1), kwargs = {}) | |
%add_200 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_128, 1e-05), kwargs = {}) | |
%rsqrt_64 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_200,), kwargs = {}) | |
%sub_64 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_199, %getitem_129), kwargs = {}) | |
%mul_139 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_64, %rsqrt_64), kwargs = {}) | |
%_param_constant397 : [#users=1] = get_attr[target=_param_constant397] | |
%mul_140 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_139, %_param_constant397), kwargs = {}) | |
%_param_constant398 : [#users=1] = get_attr[target=_param_constant398] | |
%add_201 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_140, %_param_constant398), kwargs = {}) | |
%_param_constant399 : [#users=1] = get_attr[target=_param_constant399] | |
%t_118 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant399,), kwargs = {}) | |
%view_314 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_201, [512, 1280]), kwargs = {}) | |
%mm_51 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_314, %t_118), kwargs = {}) | |
%_unsafe_view_128 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_51, [2, 256, 1280]), kwargs = {}) | |
%view_315 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_128, [2, 256, 20, 64]), kwargs = {}) | |
%permute_85 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_315, [0, 2, 1, 3]), kwargs = {}) | |
%clone_76 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_85,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_129 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_76, [40, 256, 64]), kwargs = {}) | |
%_param_constant400 : [#users=1] = get_attr[target=_param_constant400] | |
%t_119 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant400,), kwargs = {}) | |
%view_316 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_52 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_316, %t_119), kwargs = {}) | |
%_unsafe_view_130 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_52, [2, 64, 1280]), kwargs = {}) | |
%_param_constant401 : [#users=1] = get_attr[target=_param_constant401] | |
%t_120 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant401,), kwargs = {}) | |
%view_317 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_53 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_317, %t_120), kwargs = {}) | |
%_unsafe_view_131 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_53, [2, 64, 1280]), kwargs = {}) | |
%view_318 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_130, [2, 64, 20, 64]), kwargs = {}) | |
%permute_86 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_318, [0, 2, 1, 3]), kwargs = {}) | |
%clone_77 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_86,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_132 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_77, [40, 64, 64]), kwargs = {}) | |
%view_319 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_131, [2, 64, 20, 64]), kwargs = {}) | |
%permute_87 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_319, [0, 2, 1, 3]), kwargs = {}) | |
%clone_78 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_87,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_133 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_78, [40, 64, 64]), kwargs = {}) | |
%empty_17 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_17 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_132, -1, -2), kwargs = {}) | |
%baddbmm_17 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_17, %_unsafe_view_129, %transpose_17), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_17 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_17, -1, False), kwargs = {}) | |
%detach_95 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_17,), kwargs = {}) | |
%bmm_26 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_17, %_unsafe_view_133), kwargs = {}) | |
%view_320 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_26, [2, 20, 256, 64]), kwargs = {}) | |
%permute_88 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_320, [0, 2, 1, 3]), kwargs = {}) | |
%clone_79 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_88,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_134 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_79, [2, 256, 1280]), kwargs = {}) | |
%_param_constant402 : [#users=1] = get_attr[target=_param_constant402] | |
%t_121 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant402,), kwargs = {}) | |
%view_321 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_134, [512, 1280]), kwargs = {}) | |
%_param_constant403 : [#users=1] = get_attr[target=_param_constant403] | |
%addmm_58 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant403, %view_321, %t_121), kwargs = {}) | |
%view_322 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_58, [2, 256, 1280]), kwargs = {}) | |
%add_202 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_322, %add_199), kwargs = {}) | |
%var_mean_65 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_202, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_130 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_65, 0), kwargs = {}) | |
%getitem_131 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_65, 1), kwargs = {}) | |
%add_203 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_130, 1e-05), kwargs = {}) | |
%rsqrt_65 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_203,), kwargs = {}) | |
%sub_65 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_202, %getitem_131), kwargs = {}) | |
%mul_141 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_65, %rsqrt_65), kwargs = {}) | |
%_param_constant404 : [#users=1] = get_attr[target=_param_constant404] | |
%mul_142 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_141, %_param_constant404), kwargs = {}) | |
%_param_constant405 : [#users=1] = get_attr[target=_param_constant405] | |
%add_204 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_142, %_param_constant405), kwargs = {}) | |
%_param_constant406 : [#users=1] = get_attr[target=_param_constant406] | |
%t_122 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant406,), kwargs = {}) | |
%view_323 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_204, [512, 1280]), kwargs = {}) | |
%_param_constant407 : [#users=1] = get_attr[target=_param_constant407] | |
%addmm_59 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant407, %view_323, %t_122), kwargs = {}) | |
%view_324 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_59, [2, 256, 10240]), kwargs = {}) | |
%slice_53 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_324, -1, 0, 5120), kwargs = {}) | |
%slice_54 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_324, -1, 5120, 10240), kwargs = {}) | |
%gelu_8 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_54,), kwargs = {}) | |
%mul_143 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_53, %gelu_8), kwargs = {}) | |
%_param_constant408 : [#users=1] = get_attr[target=_param_constant408] | |
%t_123 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant408,), kwargs = {}) | |
%view_325 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_143, [512, 5120]), kwargs = {}) | |
%_param_constant409 : [#users=1] = get_attr[target=_param_constant409] | |
%addmm_60 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant409, %view_325, %t_123), kwargs = {}) | |
%view_326 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_60, [2, 256, 1280]), kwargs = {}) | |
%add_205 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_326, %add_202), kwargs = {}) | |
%_param_constant410 : [#users=1] = get_attr[target=_param_constant410] | |
%t_124 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant410,), kwargs = {}) | |
%view_327 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_205, [512, 1280]), kwargs = {}) | |
%_param_constant411 : [#users=1] = get_attr[target=_param_constant411] | |
%addmm_61 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant411, %view_327, %t_124), kwargs = {}) | |
%view_328 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_61, [2, 256, 1280]), kwargs = {}) | |
%view_329 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_328, [2, 16, 16, 1280]), kwargs = {}) | |
%permute_89 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_329, [0, 3, 1, 2]), kwargs = {}) | |
%clone_80 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_89,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_206 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%clone_80, %div_15), kwargs = {}) | |
%cat_8 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%add_206, %convolution_11], 1), kwargs = {}) | |
%view_330 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_8, [2, 32, 60, 256]), kwargs = {}) | |
%var_mean_66 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_330, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_132 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_66, 0), kwargs = {}) | |
%getitem_133 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_66, 1), kwargs = {}) | |
%add_207 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_132, 1e-05), kwargs = {}) | |
%rsqrt_66 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_207,), kwargs = {}) | |
%sub_66 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_330, %getitem_133), kwargs = {}) | |
%mul_144 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_66, %rsqrt_66), kwargs = {}) | |
%view_331 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_144, [2, 1920, 16, 16]), kwargs = {}) | |
%_param_constant412 : [#users=1] = get_attr[target=_param_constant412] | |
%unsqueeze_266 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant412, 0), kwargs = {}) | |
%unsqueeze_267 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_266, 2), kwargs = {}) | |
%unsqueeze_268 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_267, 3), kwargs = {}) | |
%_param_constant413 : [#users=1] = get_attr[target=_param_constant413] | |
%unsqueeze_269 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant413, 0), kwargs = {}) | |
%unsqueeze_270 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_269, 2), kwargs = {}) | |
%unsqueeze_271 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_270, 3), kwargs = {}) | |
%mul_145 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_331, %unsqueeze_271), kwargs = {}) | |
%add_208 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_145, %unsqueeze_268), kwargs = {}) | |
%squeeze_156 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_133, 3), kwargs = {}) | |
%squeeze_157 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_156, 2), kwargs = {}) | |
%squeeze_158 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_66, 3), kwargs = {}) | |
%squeeze_159 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_158, 2), kwargs = {}) | |
%detach_96 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_157,), kwargs = {}) | |
%detach_97 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_159,), kwargs = {}) | |
%silu_46 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_208,), kwargs = {}) | |
%_param_constant414 : [#users=1] = get_attr[target=_param_constant414] | |
%_param_constant415 : [#users=1] = get_attr[target=_param_constant415] | |
%convolution_42 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_46, %_param_constant414, %_param_constant415, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_47 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant416 : [#users=1] = get_attr[target=_param_constant416] | |
%t_125 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant416,), kwargs = {}) | |
%_param_constant417 : [#users=1] = get_attr[target=_param_constant417] | |
%addmm_62 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant417, %silu_47, %t_125), kwargs = {}) | |
%slice_55 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_62, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_56 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_55, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_272 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_56, 2), kwargs = {}) | |
%unsqueeze_273 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_272, 3), kwargs = {}) | |
%add_209 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_42, %unsqueeze_273), kwargs = {}) | |
%view_332 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_209, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_67 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_332, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_134 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_67, 0), kwargs = {}) | |
%getitem_135 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_67, 1), kwargs = {}) | |
%add_210 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_134, 1e-05), kwargs = {}) | |
%rsqrt_67 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_210,), kwargs = {}) | |
%sub_67 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_332, %getitem_135), kwargs = {}) | |
%mul_146 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_67, %rsqrt_67), kwargs = {}) | |
%view_333 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_146, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant418 : [#users=1] = get_attr[target=_param_constant418] | |
%unsqueeze_274 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant418, 0), kwargs = {}) | |
%unsqueeze_275 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_274, 2), kwargs = {}) | |
%unsqueeze_276 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_275, 3), kwargs = {}) | |
%_param_constant419 : [#users=1] = get_attr[target=_param_constant419] | |
%unsqueeze_277 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant419, 0), kwargs = {}) | |
%unsqueeze_278 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_277, 2), kwargs = {}) | |
%unsqueeze_279 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_278, 3), kwargs = {}) | |
%mul_147 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_333, %unsqueeze_279), kwargs = {}) | |
%add_211 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_147, %unsqueeze_276), kwargs = {}) | |
%squeeze_160 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_135, 3), kwargs = {}) | |
%squeeze_161 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_160, 2), kwargs = {}) | |
%squeeze_162 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_67, 3), kwargs = {}) | |
%squeeze_163 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_162, 2), kwargs = {}) | |
%detach_98 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_161,), kwargs = {}) | |
%detach_99 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_163,), kwargs = {}) | |
%silu_48 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_211,), kwargs = {}) | |
%_param_constant420 : [#users=1] = get_attr[target=_param_constant420] | |
%_param_constant421 : [#users=1] = get_attr[target=_param_constant421] | |
%convolution_43 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_48, %_param_constant420, %_param_constant421, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant422 : [#users=1] = get_attr[target=_param_constant422] | |
%_param_constant423 : [#users=1] = get_attr[target=_param_constant423] | |
%convolution_44 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_8, %_param_constant422, %_param_constant423, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_212 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_44, %convolution_43), kwargs = {}) | |
%div_16 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_212, 1.0), kwargs = {}) | |
%view_334 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_16, [2, 32, 40, 256]), kwargs = {}) | |
%var_mean_68 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_334, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_136 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_68, 0), kwargs = {}) | |
%getitem_137 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_68, 1), kwargs = {}) | |
%add_213 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_136, 1e-06), kwargs = {}) | |
%rsqrt_68 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_213,), kwargs = {}) | |
%sub_68 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_334, %getitem_137), kwargs = {}) | |
%mul_148 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_68, %rsqrt_68), kwargs = {}) | |
%view_335 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_148, [2, 1280, 16, 16]), kwargs = {}) | |
%_param_constant424 : [#users=1] = get_attr[target=_param_constant424] | |
%unsqueeze_280 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant424, 0), kwargs = {}) | |
%unsqueeze_281 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_280, 2), kwargs = {}) | |
%unsqueeze_282 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_281, 3), kwargs = {}) | |
%_param_constant425 : [#users=1] = get_attr[target=_param_constant425] | |
%unsqueeze_283 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant425, 0), kwargs = {}) | |
%unsqueeze_284 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_283, 2), kwargs = {}) | |
%unsqueeze_285 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_284, 3), kwargs = {}) | |
%mul_149 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_335, %unsqueeze_285), kwargs = {}) | |
%add_214 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_149, %unsqueeze_282), kwargs = {}) | |
%squeeze_164 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_137, 3), kwargs = {}) | |
%squeeze_165 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_164, 2), kwargs = {}) | |
%squeeze_166 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_68, 3), kwargs = {}) | |
%squeeze_167 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_166, 2), kwargs = {}) | |
%detach_100 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_165,), kwargs = {}) | |
%detach_101 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_167,), kwargs = {}) | |
%permute_90 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_214, [0, 2, 3, 1]), kwargs = {}) | |
%view_336 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_90, [2, 256, 1280]), kwargs = {}) | |
%_param_constant426 : [#users=1] = get_attr[target=_param_constant426] | |
%t_126 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant426,), kwargs = {}) | |
%expand_19 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_336, [2, 256, 1280]), kwargs = {}) | |
%view_337 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_19, [2, 256, 1280]), kwargs = {}) | |
%expand_20 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_126, [2, 1280, 1280]), kwargs = {}) | |
%view_338 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_20, [2, 1280, 1280]), kwargs = {}) | |
%bmm_27 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_337, %view_338), kwargs = {}) | |
%_unsafe_view_135 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_27, [2, 256, 1280]), kwargs = {}) | |
%_param_constant427 : [#users=1] = get_attr[target=_param_constant427] | |
%add_215 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_135, %_param_constant427), kwargs = {}) | |
%var_mean_69 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_215, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_138 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_69, 0), kwargs = {}) | |
%getitem_139 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_69, 1), kwargs = {}) | |
%add_216 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_138, 1e-05), kwargs = {}) | |
%rsqrt_69 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_216,), kwargs = {}) | |
%sub_69 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_215, %getitem_139), kwargs = {}) | |
%mul_150 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_69, %rsqrt_69), kwargs = {}) | |
%_param_constant428 : [#users=1] = get_attr[target=_param_constant428] | |
%mul_151 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_150, %_param_constant428), kwargs = {}) | |
%_param_constant429 : [#users=1] = get_attr[target=_param_constant429] | |
%add_217 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_151, %_param_constant429), kwargs = {}) | |
%_param_constant430 : [#users=1] = get_attr[target=_param_constant430] | |
%t_127 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant430,), kwargs = {}) | |
%view_339 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_217, [512, 1280]), kwargs = {}) | |
%mm_54 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_339, %t_127), kwargs = {}) | |
%_unsafe_view_136 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_54, [2, 256, 1280]), kwargs = {}) | |
%view_340 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_136, [2, 256, 20, 64]), kwargs = {}) | |
%permute_91 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_340, [0, 2, 1, 3]), kwargs = {}) | |
%clone_81 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_91,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_137 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_81, [40, 256, 64]), kwargs = {}) | |
%_param_constant431 : [#users=1] = get_attr[target=_param_constant431] | |
%t_128 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant431,), kwargs = {}) | |
%view_341 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_217, [512, 1280]), kwargs = {}) | |
%mm_55 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_341, %t_128), kwargs = {}) | |
%_unsafe_view_138 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_55, [2, 256, 1280]), kwargs = {}) | |
%_param_constant432 : [#users=1] = get_attr[target=_param_constant432] | |
%t_129 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant432,), kwargs = {}) | |
%view_342 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_217, [512, 1280]), kwargs = {}) | |
%mm_56 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_342, %t_129), kwargs = {}) | |
%_unsafe_view_139 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_56, [2, 256, 1280]), kwargs = {}) | |
%view_343 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_138, [2, 256, 20, 64]), kwargs = {}) | |
%permute_92 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_343, [0, 2, 1, 3]), kwargs = {}) | |
%clone_82 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_92,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_140 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_82, [40, 256, 64]), kwargs = {}) | |
%view_344 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_139, [2, 256, 20, 64]), kwargs = {}) | |
%permute_93 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_344, [0, 2, 1, 3]), kwargs = {}) | |
%clone_83 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_93,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_141 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_83, [40, 256, 64]), kwargs = {}) | |
%empty_18 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 256],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_18 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_140, -1, -2), kwargs = {}) | |
%baddbmm_18 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_18, %_unsafe_view_137, %transpose_18), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_18 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_18, -1, False), kwargs = {}) | |
%detach_102 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_18,), kwargs = {}) | |
%bmm_28 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_18, %_unsafe_view_141), kwargs = {}) | |
%view_345 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_28, [2, 20, 256, 64]), kwargs = {}) | |
%permute_94 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_345, [0, 2, 1, 3]), kwargs = {}) | |
%clone_84 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_94,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_142 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_84, [2, 256, 1280]), kwargs = {}) | |
%_param_constant433 : [#users=1] = get_attr[target=_param_constant433] | |
%t_130 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant433,), kwargs = {}) | |
%view_346 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_142, [512, 1280]), kwargs = {}) | |
%_param_constant434 : [#users=1] = get_attr[target=_param_constant434] | |
%addmm_63 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant434, %view_346, %t_130), kwargs = {}) | |
%view_347 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_63, [2, 256, 1280]), kwargs = {}) | |
%add_218 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_347, %add_215), kwargs = {}) | |
%var_mean_70 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_218, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_140 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_70, 0), kwargs = {}) | |
%getitem_141 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_70, 1), kwargs = {}) | |
%add_219 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_140, 1e-05), kwargs = {}) | |
%rsqrt_70 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_219,), kwargs = {}) | |
%sub_70 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_218, %getitem_141), kwargs = {}) | |
%mul_152 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_70, %rsqrt_70), kwargs = {}) | |
%_param_constant435 : [#users=1] = get_attr[target=_param_constant435] | |
%mul_153 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_152, %_param_constant435), kwargs = {}) | |
%_param_constant436 : [#users=1] = get_attr[target=_param_constant436] | |
%add_220 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_153, %_param_constant436), kwargs = {}) | |
%_param_constant437 : [#users=1] = get_attr[target=_param_constant437] | |
%t_131 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant437,), kwargs = {}) | |
%view_348 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_220, [512, 1280]), kwargs = {}) | |
%mm_57 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_348, %t_131), kwargs = {}) | |
%_unsafe_view_143 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_57, [2, 256, 1280]), kwargs = {}) | |
%view_349 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_143, [2, 256, 20, 64]), kwargs = {}) | |
%permute_95 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_349, [0, 2, 1, 3]), kwargs = {}) | |
%clone_85 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_95,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_144 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_85, [40, 256, 64]), kwargs = {}) | |
%_param_constant438 : [#users=1] = get_attr[target=_param_constant438] | |
%t_132 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant438,), kwargs = {}) | |
%view_350 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_58 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_350, %t_132), kwargs = {}) | |
%_unsafe_view_145 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_58, [2, 64, 1280]), kwargs = {}) | |
%_param_constant439 : [#users=1] = get_attr[target=_param_constant439] | |
%t_133 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant439,), kwargs = {}) | |
%view_351 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_59 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_351, %t_133), kwargs = {}) | |
%_unsafe_view_146 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_59, [2, 64, 1280]), kwargs = {}) | |
%view_352 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_145, [2, 64, 20, 64]), kwargs = {}) | |
%permute_96 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_352, [0, 2, 1, 3]), kwargs = {}) | |
%clone_86 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_96,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_147 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_86, [40, 64, 64]), kwargs = {}) | |
%view_353 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_146, [2, 64, 20, 64]), kwargs = {}) | |
%permute_97 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_353, [0, 2, 1, 3]), kwargs = {}) | |
%clone_87 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_97,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_148 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_87, [40, 64, 64]), kwargs = {}) | |
%empty_19 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([40, 256, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_19 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_147, -1, -2), kwargs = {}) | |
%baddbmm_19 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_19, %_unsafe_view_144, %transpose_19), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_19 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_19, -1, False), kwargs = {}) | |
%detach_103 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_19,), kwargs = {}) | |
%bmm_29 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_19, %_unsafe_view_148), kwargs = {}) | |
%view_354 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_29, [2, 20, 256, 64]), kwargs = {}) | |
%permute_98 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_354, [0, 2, 1, 3]), kwargs = {}) | |
%clone_88 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_98,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_149 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_88, [2, 256, 1280]), kwargs = {}) | |
%_param_constant440 : [#users=1] = get_attr[target=_param_constant440] | |
%t_134 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant440,), kwargs = {}) | |
%view_355 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_149, [512, 1280]), kwargs = {}) | |
%_param_constant441 : [#users=1] = get_attr[target=_param_constant441] | |
%addmm_64 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant441, %view_355, %t_134), kwargs = {}) | |
%view_356 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_64, [2, 256, 1280]), kwargs = {}) | |
%add_221 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_356, %add_218), kwargs = {}) | |
%var_mean_71 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_221, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_142 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_71, 0), kwargs = {}) | |
%getitem_143 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_71, 1), kwargs = {}) | |
%add_222 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_142, 1e-05), kwargs = {}) | |
%rsqrt_71 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_222,), kwargs = {}) | |
%sub_71 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_221, %getitem_143), kwargs = {}) | |
%mul_154 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_71, %rsqrt_71), kwargs = {}) | |
%_param_constant442 : [#users=1] = get_attr[target=_param_constant442] | |
%mul_155 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_154, %_param_constant442), kwargs = {}) | |
%_param_constant443 : [#users=1] = get_attr[target=_param_constant443] | |
%add_223 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_155, %_param_constant443), kwargs = {}) | |
%_param_constant444 : [#users=1] = get_attr[target=_param_constant444] | |
%t_135 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant444,), kwargs = {}) | |
%view_357 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_223, [512, 1280]), kwargs = {}) | |
%_param_constant445 : [#users=1] = get_attr[target=_param_constant445] | |
%addmm_65 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant445, %view_357, %t_135), kwargs = {}) | |
%view_358 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_65, [2, 256, 10240]), kwargs = {}) | |
%slice_57 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_358, -1, 0, 5120), kwargs = {}) | |
%slice_58 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_358, -1, 5120, 10240), kwargs = {}) | |
%gelu_9 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_58,), kwargs = {}) | |
%mul_156 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_57, %gelu_9), kwargs = {}) | |
%_param_constant446 : [#users=1] = get_attr[target=_param_constant446] | |
%t_136 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant446,), kwargs = {}) | |
%view_359 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_156, [512, 5120]), kwargs = {}) | |
%_param_constant447 : [#users=1] = get_attr[target=_param_constant447] | |
%addmm_66 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant447, %view_359, %t_136), kwargs = {}) | |
%view_360 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_66, [2, 256, 1280]), kwargs = {}) | |
%add_224 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_360, %add_221), kwargs = {}) | |
%_param_constant448 : [#users=1] = get_attr[target=_param_constant448] | |
%t_137 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant448,), kwargs = {}) | |
%view_361 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_224, [512, 1280]), kwargs = {}) | |
%_param_constant449 : [#users=1] = get_attr[target=_param_constant449] | |
%addmm_67 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant449, %view_361, %t_137), kwargs = {}) | |
%view_362 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_67, [2, 256, 1280]), kwargs = {}) | |
%view_363 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_362, [2, 16, 16, 1280]), kwargs = {}) | |
%permute_99 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_363, [0, 3, 1, 2]), kwargs = {}) | |
%clone_89 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_99,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_225 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%clone_89, %div_16), kwargs = {}) | |
%upsample_nearest2d_1 : [#users=1] = call_function[target=torch.ops.aten.upsample_nearest2d](args = (%add_225, [32, 32], 2.0, 2.0), kwargs = {}) | |
%_param_constant450 : [#users=1] = get_attr[target=_param_constant450] | |
%_param_constant451 : [#users=1] = get_attr[target=_param_constant451] | |
%convolution_45 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%upsample_nearest2d_1, %_param_constant450, %_param_constant451, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%cat_9 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%convolution_45, %add_75], 1), kwargs = {}) | |
%view_364 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_9, [2, 32, 60, 1024]), kwargs = {}) | |
%var_mean_72 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_364, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_144 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_72, 0), kwargs = {}) | |
%getitem_145 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_72, 1), kwargs = {}) | |
%add_226 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_144, 1e-05), kwargs = {}) | |
%rsqrt_72 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_226,), kwargs = {}) | |
%sub_72 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_364, %getitem_145), kwargs = {}) | |
%mul_157 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_72, %rsqrt_72), kwargs = {}) | |
%view_365 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_157, [2, 1920, 32, 32]), kwargs = {}) | |
%_param_constant452 : [#users=1] = get_attr[target=_param_constant452] | |
%unsqueeze_286 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant452, 0), kwargs = {}) | |
%unsqueeze_287 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_286, 2), kwargs = {}) | |
%unsqueeze_288 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_287, 3), kwargs = {}) | |
%_param_constant453 : [#users=1] = get_attr[target=_param_constant453] | |
%unsqueeze_289 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant453, 0), kwargs = {}) | |
%unsqueeze_290 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_289, 2), kwargs = {}) | |
%unsqueeze_291 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_290, 3), kwargs = {}) | |
%mul_158 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_365, %unsqueeze_291), kwargs = {}) | |
%add_227 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_158, %unsqueeze_288), kwargs = {}) | |
%squeeze_168 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_145, 3), kwargs = {}) | |
%squeeze_169 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_168, 2), kwargs = {}) | |
%squeeze_170 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_72, 3), kwargs = {}) | |
%squeeze_171 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_170, 2), kwargs = {}) | |
%detach_104 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_169,), kwargs = {}) | |
%detach_105 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_171,), kwargs = {}) | |
%silu_49 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_227,), kwargs = {}) | |
%_param_constant454 : [#users=1] = get_attr[target=_param_constant454] | |
%_param_constant455 : [#users=1] = get_attr[target=_param_constant455] | |
%convolution_46 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_49, %_param_constant454, %_param_constant455, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_50 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant456 : [#users=1] = get_attr[target=_param_constant456] | |
%t_138 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant456,), kwargs = {}) | |
%_param_constant457 : [#users=1] = get_attr[target=_param_constant457] | |
%addmm_68 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant457, %silu_50, %t_138), kwargs = {}) | |
%slice_59 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_68, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_60 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_59, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_292 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_60, 2), kwargs = {}) | |
%unsqueeze_293 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_292, 3), kwargs = {}) | |
%add_228 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_46, %unsqueeze_293), kwargs = {}) | |
%view_366 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_228, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_73 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_366, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_146 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_73, 0), kwargs = {}) | |
%getitem_147 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_73, 1), kwargs = {}) | |
%add_229 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_146, 1e-05), kwargs = {}) | |
%rsqrt_73 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_229,), kwargs = {}) | |
%sub_73 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_366, %getitem_147), kwargs = {}) | |
%mul_159 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_73, %rsqrt_73), kwargs = {}) | |
%view_367 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_159, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant458 : [#users=1] = get_attr[target=_param_constant458] | |
%unsqueeze_294 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant458, 0), kwargs = {}) | |
%unsqueeze_295 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_294, 2), kwargs = {}) | |
%unsqueeze_296 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_295, 3), kwargs = {}) | |
%_param_constant459 : [#users=1] = get_attr[target=_param_constant459] | |
%unsqueeze_297 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant459, 0), kwargs = {}) | |
%unsqueeze_298 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_297, 2), kwargs = {}) | |
%unsqueeze_299 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_298, 3), kwargs = {}) | |
%mul_160 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_367, %unsqueeze_299), kwargs = {}) | |
%add_230 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_160, %unsqueeze_296), kwargs = {}) | |
%squeeze_172 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_147, 3), kwargs = {}) | |
%squeeze_173 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_172, 2), kwargs = {}) | |
%squeeze_174 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_73, 3), kwargs = {}) | |
%squeeze_175 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_174, 2), kwargs = {}) | |
%detach_106 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_173,), kwargs = {}) | |
%detach_107 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_175,), kwargs = {}) | |
%silu_51 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_230,), kwargs = {}) | |
%_param_constant460 : [#users=1] = get_attr[target=_param_constant460] | |
%_param_constant461 : [#users=1] = get_attr[target=_param_constant461] | |
%convolution_47 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_51, %_param_constant460, %_param_constant461, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant462 : [#users=1] = get_attr[target=_param_constant462] | |
%_param_constant463 : [#users=1] = get_attr[target=_param_constant463] | |
%convolution_48 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_9, %_param_constant462, %_param_constant463, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_231 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_48, %convolution_47), kwargs = {}) | |
%div_17 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_231, 1.0), kwargs = {}) | |
%view_368 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_17, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_74 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_368, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_148 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_74, 0), kwargs = {}) | |
%getitem_149 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_74, 1), kwargs = {}) | |
%add_232 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_148, 1e-06), kwargs = {}) | |
%rsqrt_74 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_232,), kwargs = {}) | |
%sub_74 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_368, %getitem_149), kwargs = {}) | |
%mul_161 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_74, %rsqrt_74), kwargs = {}) | |
%view_369 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_161, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant464 : [#users=1] = get_attr[target=_param_constant464] | |
%unsqueeze_300 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant464, 0), kwargs = {}) | |
%unsqueeze_301 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_300, 2), kwargs = {}) | |
%unsqueeze_302 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_301, 3), kwargs = {}) | |
%_param_constant465 : [#users=1] = get_attr[target=_param_constant465] | |
%unsqueeze_303 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant465, 0), kwargs = {}) | |
%unsqueeze_304 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_303, 2), kwargs = {}) | |
%unsqueeze_305 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_304, 3), kwargs = {}) | |
%mul_162 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_369, %unsqueeze_305), kwargs = {}) | |
%add_233 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_162, %unsqueeze_302), kwargs = {}) | |
%squeeze_176 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_149, 3), kwargs = {}) | |
%squeeze_177 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_176, 2), kwargs = {}) | |
%squeeze_178 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_74, 3), kwargs = {}) | |
%squeeze_179 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_178, 2), kwargs = {}) | |
%detach_108 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_177,), kwargs = {}) | |
%detach_109 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_179,), kwargs = {}) | |
%permute_100 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_233, [0, 2, 3, 1]), kwargs = {}) | |
%view_370 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_100, [2, 1024, 640]), kwargs = {}) | |
%_param_constant466 : [#users=1] = get_attr[target=_param_constant466] | |
%t_139 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant466,), kwargs = {}) | |
%expand_21 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_370, [2, 1024, 640]), kwargs = {}) | |
%view_371 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_21, [2, 1024, 640]), kwargs = {}) | |
%expand_22 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_139, [2, 640, 640]), kwargs = {}) | |
%view_372 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_22, [2, 640, 640]), kwargs = {}) | |
%bmm_30 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_371, %view_372), kwargs = {}) | |
%_unsafe_view_150 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_30, [2, 1024, 640]), kwargs = {}) | |
%_param_constant467 : [#users=1] = get_attr[target=_param_constant467] | |
%add_234 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_150, %_param_constant467), kwargs = {}) | |
%var_mean_75 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_234, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_150 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_75, 0), kwargs = {}) | |
%getitem_151 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_75, 1), kwargs = {}) | |
%add_235 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_150, 1e-05), kwargs = {}) | |
%rsqrt_75 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_235,), kwargs = {}) | |
%sub_75 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_234, %getitem_151), kwargs = {}) | |
%mul_163 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_75, %rsqrt_75), kwargs = {}) | |
%_param_constant468 : [#users=1] = get_attr[target=_param_constant468] | |
%mul_164 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_163, %_param_constant468), kwargs = {}) | |
%_param_constant469 : [#users=1] = get_attr[target=_param_constant469] | |
%add_236 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_164, %_param_constant469), kwargs = {}) | |
%_param_constant470 : [#users=1] = get_attr[target=_param_constant470] | |
%t_140 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant470,), kwargs = {}) | |
%view_373 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_236, [2048, 640]), kwargs = {}) | |
%mm_60 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_373, %t_140), kwargs = {}) | |
%_unsafe_view_151 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_60, [2, 1024, 640]), kwargs = {}) | |
%view_374 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_151, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_101 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_374, [0, 2, 1, 3]), kwargs = {}) | |
%clone_90 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_101,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_152 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_90, [20, 1024, 64]), kwargs = {}) | |
%_param_constant471 : [#users=1] = get_attr[target=_param_constant471] | |
%t_141 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant471,), kwargs = {}) | |
%view_375 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_236, [2048, 640]), kwargs = {}) | |
%mm_61 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_375, %t_141), kwargs = {}) | |
%_unsafe_view_153 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_61, [2, 1024, 640]), kwargs = {}) | |
%_param_constant472 : [#users=1] = get_attr[target=_param_constant472] | |
%t_142 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant472,), kwargs = {}) | |
%view_376 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_236, [2048, 640]), kwargs = {}) | |
%mm_62 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_376, %t_142), kwargs = {}) | |
%_unsafe_view_154 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_62, [2, 1024, 640]), kwargs = {}) | |
%view_377 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_153, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_102 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_377, [0, 2, 1, 3]), kwargs = {}) | |
%clone_91 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_102,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_155 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_91, [20, 1024, 64]), kwargs = {}) | |
%view_378 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_154, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_103 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_378, [0, 2, 1, 3]), kwargs = {}) | |
%clone_92 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_103,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_156 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_92, [20, 1024, 64]), kwargs = {}) | |
%empty_20 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 1024],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_20 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_155, -1, -2), kwargs = {}) | |
%baddbmm_20 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_20, %_unsafe_view_152, %transpose_20), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_20 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_20, -1, False), kwargs = {}) | |
%detach_110 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_20,), kwargs = {}) | |
%bmm_31 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_20, %_unsafe_view_156), kwargs = {}) | |
%view_379 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_31, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_104 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_379, [0, 2, 1, 3]), kwargs = {}) | |
%clone_93 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_104,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_157 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_93, [2, 1024, 640]), kwargs = {}) | |
%_param_constant473 : [#users=1] = get_attr[target=_param_constant473] | |
%t_143 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant473,), kwargs = {}) | |
%view_380 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_157, [2048, 640]), kwargs = {}) | |
%_param_constant474 : [#users=1] = get_attr[target=_param_constant474] | |
%addmm_69 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant474, %view_380, %t_143), kwargs = {}) | |
%view_381 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_69, [2, 1024, 640]), kwargs = {}) | |
%add_237 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_381, %add_234), kwargs = {}) | |
%var_mean_76 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_237, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_152 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_76, 0), kwargs = {}) | |
%getitem_153 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_76, 1), kwargs = {}) | |
%add_238 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_152, 1e-05), kwargs = {}) | |
%rsqrt_76 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_238,), kwargs = {}) | |
%sub_76 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_237, %getitem_153), kwargs = {}) | |
%mul_165 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_76, %rsqrt_76), kwargs = {}) | |
%_param_constant475 : [#users=1] = get_attr[target=_param_constant475] | |
%mul_166 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_165, %_param_constant475), kwargs = {}) | |
%_param_constant476 : [#users=1] = get_attr[target=_param_constant476] | |
%add_239 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_166, %_param_constant476), kwargs = {}) | |
%_param_constant477 : [#users=1] = get_attr[target=_param_constant477] | |
%t_144 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant477,), kwargs = {}) | |
%view_382 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_239, [2048, 640]), kwargs = {}) | |
%mm_63 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_382, %t_144), kwargs = {}) | |
%_unsafe_view_158 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_63, [2, 1024, 640]), kwargs = {}) | |
%view_383 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_158, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_105 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_383, [0, 2, 1, 3]), kwargs = {}) | |
%clone_94 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_105,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_159 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_94, [20, 1024, 64]), kwargs = {}) | |
%_param_constant478 : [#users=1] = get_attr[target=_param_constant478] | |
%t_145 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant478,), kwargs = {}) | |
%view_384 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_64 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_384, %t_145), kwargs = {}) | |
%_unsafe_view_160 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_64, [2, 64, 640]), kwargs = {}) | |
%_param_constant479 : [#users=1] = get_attr[target=_param_constant479] | |
%t_146 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant479,), kwargs = {}) | |
%view_385 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_65 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_385, %t_146), kwargs = {}) | |
%_unsafe_view_161 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_65, [2, 64, 640]), kwargs = {}) | |
%view_386 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_160, [2, 64, 10, 64]), kwargs = {}) | |
%permute_106 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_386, [0, 2, 1, 3]), kwargs = {}) | |
%clone_95 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_106,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_162 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_95, [20, 64, 64]), kwargs = {}) | |
%view_387 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_161, [2, 64, 10, 64]), kwargs = {}) | |
%permute_107 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_387, [0, 2, 1, 3]), kwargs = {}) | |
%clone_96 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_107,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_163 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_96, [20, 64, 64]), kwargs = {}) | |
%empty_21 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_21 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_162, -1, -2), kwargs = {}) | |
%baddbmm_21 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_21, %_unsafe_view_159, %transpose_21), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_21 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_21, -1, False), kwargs = {}) | |
%detach_111 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_21,), kwargs = {}) | |
%bmm_32 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_21, %_unsafe_view_163), kwargs = {}) | |
%view_388 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_32, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_108 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_388, [0, 2, 1, 3]), kwargs = {}) | |
%clone_97 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_108,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_164 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_97, [2, 1024, 640]), kwargs = {}) | |
%_param_constant480 : [#users=1] = get_attr[target=_param_constant480] | |
%t_147 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant480,), kwargs = {}) | |
%view_389 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_164, [2048, 640]), kwargs = {}) | |
%_param_constant481 : [#users=1] = get_attr[target=_param_constant481] | |
%addmm_70 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant481, %view_389, %t_147), kwargs = {}) | |
%view_390 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_70, [2, 1024, 640]), kwargs = {}) | |
%add_240 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_390, %add_237), kwargs = {}) | |
%var_mean_77 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_240, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_154 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_77, 0), kwargs = {}) | |
%getitem_155 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_77, 1), kwargs = {}) | |
%add_241 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_154, 1e-05), kwargs = {}) | |
%rsqrt_77 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_241,), kwargs = {}) | |
%sub_77 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_240, %getitem_155), kwargs = {}) | |
%mul_167 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_77, %rsqrt_77), kwargs = {}) | |
%_param_constant482 : [#users=1] = get_attr[target=_param_constant482] | |
%mul_168 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_167, %_param_constant482), kwargs = {}) | |
%_param_constant483 : [#users=1] = get_attr[target=_param_constant483] | |
%add_242 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_168, %_param_constant483), kwargs = {}) | |
%_param_constant484 : [#users=1] = get_attr[target=_param_constant484] | |
%t_148 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant484,), kwargs = {}) | |
%view_391 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_242, [2048, 640]), kwargs = {}) | |
%_param_constant485 : [#users=1] = get_attr[target=_param_constant485] | |
%addmm_71 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant485, %view_391, %t_148), kwargs = {}) | |
%view_392 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_71, [2, 1024, 5120]), kwargs = {}) | |
%slice_61 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_392, -1, 0, 2560), kwargs = {}) | |
%slice_62 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_392, -1, 2560, 5120), kwargs = {}) | |
%gelu_10 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_62,), kwargs = {}) | |
%mul_169 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_61, %gelu_10), kwargs = {}) | |
%_param_constant486 : [#users=1] = get_attr[target=_param_constant486] | |
%t_149 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant486,), kwargs = {}) | |
%view_393 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_169, [2048, 2560]), kwargs = {}) | |
%_param_constant487 : [#users=1] = get_attr[target=_param_constant487] | |
%addmm_72 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant487, %view_393, %t_149), kwargs = {}) | |
%view_394 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_72, [2, 1024, 640]), kwargs = {}) | |
%add_243 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_394, %add_240), kwargs = {}) | |
%_param_constant488 : [#users=1] = get_attr[target=_param_constant488] | |
%t_150 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant488,), kwargs = {}) | |
%view_395 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_243, [2048, 640]), kwargs = {}) | |
%_param_constant489 : [#users=1] = get_attr[target=_param_constant489] | |
%addmm_73 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant489, %view_395, %t_150), kwargs = {}) | |
%view_396 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_73, [2, 1024, 640]), kwargs = {}) | |
%view_397 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_396, [2, 32, 32, 640]), kwargs = {}) | |
%permute_109 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_397, [0, 3, 1, 2]), kwargs = {}) | |
%clone_98 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_109,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_244 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%clone_98, %div_17), kwargs = {}) | |
%cat_10 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%add_244, %add_56], 1), kwargs = {}) | |
%view_398 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_10, [2, 32, 40, 1024]), kwargs = {}) | |
%var_mean_78 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_398, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_156 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_78, 0), kwargs = {}) | |
%getitem_157 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_78, 1), kwargs = {}) | |
%add_245 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_156, 1e-05), kwargs = {}) | |
%rsqrt_78 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_245,), kwargs = {}) | |
%sub_78 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_398, %getitem_157), kwargs = {}) | |
%mul_170 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_78, %rsqrt_78), kwargs = {}) | |
%view_399 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_170, [2, 1280, 32, 32]), kwargs = {}) | |
%_param_constant490 : [#users=1] = get_attr[target=_param_constant490] | |
%unsqueeze_306 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant490, 0), kwargs = {}) | |
%unsqueeze_307 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_306, 2), kwargs = {}) | |
%unsqueeze_308 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_307, 3), kwargs = {}) | |
%_param_constant491 : [#users=1] = get_attr[target=_param_constant491] | |
%unsqueeze_309 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant491, 0), kwargs = {}) | |
%unsqueeze_310 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_309, 2), kwargs = {}) | |
%unsqueeze_311 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_310, 3), kwargs = {}) | |
%mul_171 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_399, %unsqueeze_311), kwargs = {}) | |
%add_246 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_171, %unsqueeze_308), kwargs = {}) | |
%squeeze_180 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_157, 3), kwargs = {}) | |
%squeeze_181 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_180, 2), kwargs = {}) | |
%squeeze_182 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_78, 3), kwargs = {}) | |
%squeeze_183 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_182, 2), kwargs = {}) | |
%detach_112 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_181,), kwargs = {}) | |
%detach_113 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_183,), kwargs = {}) | |
%silu_52 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_246,), kwargs = {}) | |
%_param_constant492 : [#users=1] = get_attr[target=_param_constant492] | |
%_param_constant493 : [#users=1] = get_attr[target=_param_constant493] | |
%convolution_49 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_52, %_param_constant492, %_param_constant493, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_53 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant494 : [#users=1] = get_attr[target=_param_constant494] | |
%t_151 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant494,), kwargs = {}) | |
%_param_constant495 : [#users=1] = get_attr[target=_param_constant495] | |
%addmm_74 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant495, %silu_53, %t_151), kwargs = {}) | |
%slice_63 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_74, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_64 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_63, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_312 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_64, 2), kwargs = {}) | |
%unsqueeze_313 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_312, 3), kwargs = {}) | |
%add_247 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_49, %unsqueeze_313), kwargs = {}) | |
%view_400 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_247, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_79 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_400, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_158 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_79, 0), kwargs = {}) | |
%getitem_159 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_79, 1), kwargs = {}) | |
%add_248 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_158, 1e-05), kwargs = {}) | |
%rsqrt_79 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_248,), kwargs = {}) | |
%sub_79 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_400, %getitem_159), kwargs = {}) | |
%mul_172 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_79, %rsqrt_79), kwargs = {}) | |
%view_401 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_172, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant496 : [#users=1] = get_attr[target=_param_constant496] | |
%unsqueeze_314 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant496, 0), kwargs = {}) | |
%unsqueeze_315 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_314, 2), kwargs = {}) | |
%unsqueeze_316 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_315, 3), kwargs = {}) | |
%_param_constant497 : [#users=1] = get_attr[target=_param_constant497] | |
%unsqueeze_317 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant497, 0), kwargs = {}) | |
%unsqueeze_318 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_317, 2), kwargs = {}) | |
%unsqueeze_319 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_318, 3), kwargs = {}) | |
%mul_173 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_401, %unsqueeze_319), kwargs = {}) | |
%add_249 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_173, %unsqueeze_316), kwargs = {}) | |
%squeeze_184 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_159, 3), kwargs = {}) | |
%squeeze_185 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_184, 2), kwargs = {}) | |
%squeeze_186 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_79, 3), kwargs = {}) | |
%squeeze_187 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_186, 2), kwargs = {}) | |
%detach_114 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_185,), kwargs = {}) | |
%detach_115 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_187,), kwargs = {}) | |
%silu_54 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_249,), kwargs = {}) | |
%_param_constant498 : [#users=1] = get_attr[target=_param_constant498] | |
%_param_constant499 : [#users=1] = get_attr[target=_param_constant499] | |
%convolution_50 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_54, %_param_constant498, %_param_constant499, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant500 : [#users=1] = get_attr[target=_param_constant500] | |
%_param_constant501 : [#users=1] = get_attr[target=_param_constant501] | |
%convolution_51 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_10, %_param_constant500, %_param_constant501, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_250 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_51, %convolution_50), kwargs = {}) | |
%div_18 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_250, 1.0), kwargs = {}) | |
%view_402 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_18, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_80 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_402, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_160 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_80, 0), kwargs = {}) | |
%getitem_161 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_80, 1), kwargs = {}) | |
%add_251 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_160, 1e-06), kwargs = {}) | |
%rsqrt_80 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_251,), kwargs = {}) | |
%sub_80 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_402, %getitem_161), kwargs = {}) | |
%mul_174 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_80, %rsqrt_80), kwargs = {}) | |
%view_403 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_174, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant502 : [#users=1] = get_attr[target=_param_constant502] | |
%unsqueeze_320 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant502, 0), kwargs = {}) | |
%unsqueeze_321 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_320, 2), kwargs = {}) | |
%unsqueeze_322 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_321, 3), kwargs = {}) | |
%_param_constant503 : [#users=1] = get_attr[target=_param_constant503] | |
%unsqueeze_323 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant503, 0), kwargs = {}) | |
%unsqueeze_324 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_323, 2), kwargs = {}) | |
%unsqueeze_325 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_324, 3), kwargs = {}) | |
%mul_175 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_403, %unsqueeze_325), kwargs = {}) | |
%add_252 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_175, %unsqueeze_322), kwargs = {}) | |
%squeeze_188 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_161, 3), kwargs = {}) | |
%squeeze_189 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_188, 2), kwargs = {}) | |
%squeeze_190 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_80, 3), kwargs = {}) | |
%squeeze_191 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_190, 2), kwargs = {}) | |
%detach_116 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_189,), kwargs = {}) | |
%detach_117 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_191,), kwargs = {}) | |
%permute_110 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_252, [0, 2, 3, 1]), kwargs = {}) | |
%view_404 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_110, [2, 1024, 640]), kwargs = {}) | |
%_param_constant504 : [#users=1] = get_attr[target=_param_constant504] | |
%t_152 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant504,), kwargs = {}) | |
%expand_23 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_404, [2, 1024, 640]), kwargs = {}) | |
%view_405 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_23, [2, 1024, 640]), kwargs = {}) | |
%expand_24 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_152, [2, 640, 640]), kwargs = {}) | |
%view_406 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_24, [2, 640, 640]), kwargs = {}) | |
%bmm_33 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_405, %view_406), kwargs = {}) | |
%_unsafe_view_165 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_33, [2, 1024, 640]), kwargs = {}) | |
%_param_constant505 : [#users=1] = get_attr[target=_param_constant505] | |
%add_253 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_165, %_param_constant505), kwargs = {}) | |
%var_mean_81 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_253, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_162 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_81, 0), kwargs = {}) | |
%getitem_163 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_81, 1), kwargs = {}) | |
%add_254 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_162, 1e-05), kwargs = {}) | |
%rsqrt_81 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_254,), kwargs = {}) | |
%sub_81 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_253, %getitem_163), kwargs = {}) | |
%mul_176 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_81, %rsqrt_81), kwargs = {}) | |
%_param_constant506 : [#users=1] = get_attr[target=_param_constant506] | |
%mul_177 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_176, %_param_constant506), kwargs = {}) | |
%_param_constant507 : [#users=1] = get_attr[target=_param_constant507] | |
%add_255 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_177, %_param_constant507), kwargs = {}) | |
%_param_constant508 : [#users=1] = get_attr[target=_param_constant508] | |
%t_153 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant508,), kwargs = {}) | |
%view_407 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_255, [2048, 640]), kwargs = {}) | |
%mm_66 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_407, %t_153), kwargs = {}) | |
%_unsafe_view_166 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_66, [2, 1024, 640]), kwargs = {}) | |
%view_408 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_166, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_111 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_408, [0, 2, 1, 3]), kwargs = {}) | |
%clone_99 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_111,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_167 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_99, [20, 1024, 64]), kwargs = {}) | |
%_param_constant509 : [#users=1] = get_attr[target=_param_constant509] | |
%t_154 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant509,), kwargs = {}) | |
%view_409 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_255, [2048, 640]), kwargs = {}) | |
%mm_67 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_409, %t_154), kwargs = {}) | |
%_unsafe_view_168 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_67, [2, 1024, 640]), kwargs = {}) | |
%_param_constant510 : [#users=1] = get_attr[target=_param_constant510] | |
%t_155 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant510,), kwargs = {}) | |
%view_410 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_255, [2048, 640]), kwargs = {}) | |
%mm_68 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_410, %t_155), kwargs = {}) | |
%_unsafe_view_169 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_68, [2, 1024, 640]), kwargs = {}) | |
%view_411 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_168, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_112 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_411, [0, 2, 1, 3]), kwargs = {}) | |
%clone_100 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_112,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_170 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_100, [20, 1024, 64]), kwargs = {}) | |
%view_412 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_169, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_113 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_412, [0, 2, 1, 3]), kwargs = {}) | |
%clone_101 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_113,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_171 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_101, [20, 1024, 64]), kwargs = {}) | |
%empty_22 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 1024],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_22 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_170, -1, -2), kwargs = {}) | |
%baddbmm_22 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_22, %_unsafe_view_167, %transpose_22), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_22 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_22, -1, False), kwargs = {}) | |
%detach_118 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_22,), kwargs = {}) | |
%bmm_34 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_22, %_unsafe_view_171), kwargs = {}) | |
%view_413 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_34, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_114 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_413, [0, 2, 1, 3]), kwargs = {}) | |
%clone_102 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_114,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_172 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_102, [2, 1024, 640]), kwargs = {}) | |
%_param_constant511 : [#users=1] = get_attr[target=_param_constant511] | |
%t_156 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant511,), kwargs = {}) | |
%view_414 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_172, [2048, 640]), kwargs = {}) | |
%_param_constant512 : [#users=1] = get_attr[target=_param_constant512] | |
%addmm_75 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant512, %view_414, %t_156), kwargs = {}) | |
%view_415 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_75, [2, 1024, 640]), kwargs = {}) | |
%add_256 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_415, %add_253), kwargs = {}) | |
%var_mean_82 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_256, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_164 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_82, 0), kwargs = {}) | |
%getitem_165 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_82, 1), kwargs = {}) | |
%add_257 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_164, 1e-05), kwargs = {}) | |
%rsqrt_82 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_257,), kwargs = {}) | |
%sub_82 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_256, %getitem_165), kwargs = {}) | |
%mul_178 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_82, %rsqrt_82), kwargs = {}) | |
%_param_constant513 : [#users=1] = get_attr[target=_param_constant513] | |
%mul_179 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_178, %_param_constant513), kwargs = {}) | |
%_param_constant514 : [#users=1] = get_attr[target=_param_constant514] | |
%add_258 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_179, %_param_constant514), kwargs = {}) | |
%_param_constant515 : [#users=1] = get_attr[target=_param_constant515] | |
%t_157 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant515,), kwargs = {}) | |
%view_416 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_258, [2048, 640]), kwargs = {}) | |
%mm_69 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_416, %t_157), kwargs = {}) | |
%_unsafe_view_173 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_69, [2, 1024, 640]), kwargs = {}) | |
%view_417 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_173, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_115 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_417, [0, 2, 1, 3]), kwargs = {}) | |
%clone_103 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_115,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_174 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_103, [20, 1024, 64]), kwargs = {}) | |
%_param_constant516 : [#users=1] = get_attr[target=_param_constant516] | |
%t_158 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant516,), kwargs = {}) | |
%view_418 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_70 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_418, %t_158), kwargs = {}) | |
%_unsafe_view_175 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_70, [2, 64, 640]), kwargs = {}) | |
%_param_constant517 : [#users=1] = get_attr[target=_param_constant517] | |
%t_159 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant517,), kwargs = {}) | |
%view_419 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_71 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_419, %t_159), kwargs = {}) | |
%_unsafe_view_176 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_71, [2, 64, 640]), kwargs = {}) | |
%view_420 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_175, [2, 64, 10, 64]), kwargs = {}) | |
%permute_116 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_420, [0, 2, 1, 3]), kwargs = {}) | |
%clone_104 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_116,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_177 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_104, [20, 64, 64]), kwargs = {}) | |
%view_421 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_176, [2, 64, 10, 64]), kwargs = {}) | |
%permute_117 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_421, [0, 2, 1, 3]), kwargs = {}) | |
%clone_105 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_117,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_178 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_105, [20, 64, 64]), kwargs = {}) | |
%empty_23 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_23 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_177, -1, -2), kwargs = {}) | |
%baddbmm_23 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_23, %_unsafe_view_174, %transpose_23), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_23 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_23, -1, False), kwargs = {}) | |
%detach_119 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_23,), kwargs = {}) | |
%bmm_35 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_23, %_unsafe_view_178), kwargs = {}) | |
%view_422 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_35, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_118 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_422, [0, 2, 1, 3]), kwargs = {}) | |
%clone_106 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_118,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_179 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_106, [2, 1024, 640]), kwargs = {}) | |
%_param_constant518 : [#users=1] = get_attr[target=_param_constant518] | |
%t_160 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant518,), kwargs = {}) | |
%view_423 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_179, [2048, 640]), kwargs = {}) | |
%_param_constant519 : [#users=1] = get_attr[target=_param_constant519] | |
%addmm_76 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant519, %view_423, %t_160), kwargs = {}) | |
%view_424 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_76, [2, 1024, 640]), kwargs = {}) | |
%add_259 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_424, %add_256), kwargs = {}) | |
%var_mean_83 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_259, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_166 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_83, 0), kwargs = {}) | |
%getitem_167 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_83, 1), kwargs = {}) | |
%add_260 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_166, 1e-05), kwargs = {}) | |
%rsqrt_83 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_260,), kwargs = {}) | |
%sub_83 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_259, %getitem_167), kwargs = {}) | |
%mul_180 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_83, %rsqrt_83), kwargs = {}) | |
%_param_constant520 : [#users=1] = get_attr[target=_param_constant520] | |
%mul_181 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_180, %_param_constant520), kwargs = {}) | |
%_param_constant521 : [#users=1] = get_attr[target=_param_constant521] | |
%add_261 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_181, %_param_constant521), kwargs = {}) | |
%_param_constant522 : [#users=1] = get_attr[target=_param_constant522] | |
%t_161 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant522,), kwargs = {}) | |
%view_425 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_261, [2048, 640]), kwargs = {}) | |
%_param_constant523 : [#users=1] = get_attr[target=_param_constant523] | |
%addmm_77 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant523, %view_425, %t_161), kwargs = {}) | |
%view_426 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_77, [2, 1024, 5120]), kwargs = {}) | |
%slice_65 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_426, -1, 0, 2560), kwargs = {}) | |
%slice_66 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_426, -1, 2560, 5120), kwargs = {}) | |
%gelu_11 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_66,), kwargs = {}) | |
%mul_182 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_65, %gelu_11), kwargs = {}) | |
%_param_constant524 : [#users=1] = get_attr[target=_param_constant524] | |
%t_162 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant524,), kwargs = {}) | |
%view_427 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_182, [2048, 2560]), kwargs = {}) | |
%_param_constant525 : [#users=1] = get_attr[target=_param_constant525] | |
%addmm_78 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant525, %view_427, %t_162), kwargs = {}) | |
%view_428 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_78, [2, 1024, 640]), kwargs = {}) | |
%add_262 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_428, %add_259), kwargs = {}) | |
%_param_constant526 : [#users=1] = get_attr[target=_param_constant526] | |
%t_163 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant526,), kwargs = {}) | |
%view_429 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_262, [2048, 640]), kwargs = {}) | |
%_param_constant527 : [#users=1] = get_attr[target=_param_constant527] | |
%addmm_79 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant527, %view_429, %t_163), kwargs = {}) | |
%view_430 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_79, [2, 1024, 640]), kwargs = {}) | |
%view_431 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_430, [2, 32, 32, 640]), kwargs = {}) | |
%permute_119 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_431, [0, 3, 1, 2]), kwargs = {}) | |
%clone_107 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_119,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_263 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%clone_107, %div_18), kwargs = {}) | |
%cat_11 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%add_263, %convolution_5], 1), kwargs = {}) | |
%view_432 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_11, [2, 32, 30, 1024]), kwargs = {}) | |
%var_mean_84 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_432, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_168 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_84, 0), kwargs = {}) | |
%getitem_169 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_84, 1), kwargs = {}) | |
%add_264 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_168, 1e-05), kwargs = {}) | |
%rsqrt_84 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_264,), kwargs = {}) | |
%sub_84 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_432, %getitem_169), kwargs = {}) | |
%mul_183 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_84, %rsqrt_84), kwargs = {}) | |
%view_433 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_183, [2, 960, 32, 32]), kwargs = {}) | |
%_param_constant528 : [#users=1] = get_attr[target=_param_constant528] | |
%unsqueeze_326 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant528, 0), kwargs = {}) | |
%unsqueeze_327 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_326, 2), kwargs = {}) | |
%unsqueeze_328 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_327, 3), kwargs = {}) | |
%_param_constant529 : [#users=1] = get_attr[target=_param_constant529] | |
%unsqueeze_329 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant529, 0), kwargs = {}) | |
%unsqueeze_330 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_329, 2), kwargs = {}) | |
%unsqueeze_331 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_330, 3), kwargs = {}) | |
%mul_184 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_433, %unsqueeze_331), kwargs = {}) | |
%add_265 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_184, %unsqueeze_328), kwargs = {}) | |
%squeeze_192 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_169, 3), kwargs = {}) | |
%squeeze_193 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_192, 2), kwargs = {}) | |
%squeeze_194 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_84, 3), kwargs = {}) | |
%squeeze_195 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_194, 2), kwargs = {}) | |
%detach_120 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_193,), kwargs = {}) | |
%detach_121 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_195,), kwargs = {}) | |
%silu_55 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_265,), kwargs = {}) | |
%_param_constant530 : [#users=1] = get_attr[target=_param_constant530] | |
%_param_constant531 : [#users=1] = get_attr[target=_param_constant531] | |
%convolution_52 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_55, %_param_constant530, %_param_constant531, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_56 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant532 : [#users=1] = get_attr[target=_param_constant532] | |
%t_164 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant532,), kwargs = {}) | |
%_param_constant533 : [#users=1] = get_attr[target=_param_constant533] | |
%addmm_80 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant533, %silu_56, %t_164), kwargs = {}) | |
%slice_67 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_80, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_68 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_67, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_332 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_68, 2), kwargs = {}) | |
%unsqueeze_333 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_332, 3), kwargs = {}) | |
%add_266 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_52, %unsqueeze_333), kwargs = {}) | |
%view_434 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_266, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_85 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_434, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_170 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_85, 0), kwargs = {}) | |
%getitem_171 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_85, 1), kwargs = {}) | |
%add_267 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_170, 1e-05), kwargs = {}) | |
%rsqrt_85 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_267,), kwargs = {}) | |
%sub_85 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_434, %getitem_171), kwargs = {}) | |
%mul_185 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_85, %rsqrt_85), kwargs = {}) | |
%view_435 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_185, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant534 : [#users=1] = get_attr[target=_param_constant534] | |
%unsqueeze_334 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant534, 0), kwargs = {}) | |
%unsqueeze_335 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_334, 2), kwargs = {}) | |
%unsqueeze_336 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_335, 3), kwargs = {}) | |
%_param_constant535 : [#users=1] = get_attr[target=_param_constant535] | |
%unsqueeze_337 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant535, 0), kwargs = {}) | |
%unsqueeze_338 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_337, 2), kwargs = {}) | |
%unsqueeze_339 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_338, 3), kwargs = {}) | |
%mul_186 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_435, %unsqueeze_339), kwargs = {}) | |
%add_268 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_186, %unsqueeze_336), kwargs = {}) | |
%squeeze_196 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_171, 3), kwargs = {}) | |
%squeeze_197 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_196, 2), kwargs = {}) | |
%squeeze_198 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_85, 3), kwargs = {}) | |
%squeeze_199 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_198, 2), kwargs = {}) | |
%detach_122 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_197,), kwargs = {}) | |
%detach_123 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_199,), kwargs = {}) | |
%silu_57 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_268,), kwargs = {}) | |
%_param_constant536 : [#users=1] = get_attr[target=_param_constant536] | |
%_param_constant537 : [#users=1] = get_attr[target=_param_constant537] | |
%convolution_53 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_57, %_param_constant536, %_param_constant537, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant538 : [#users=1] = get_attr[target=_param_constant538] | |
%_param_constant539 : [#users=1] = get_attr[target=_param_constant539] | |
%convolution_54 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_11, %_param_constant538, %_param_constant539, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_269 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_54, %convolution_53), kwargs = {}) | |
%div_19 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_269, 1.0), kwargs = {}) | |
%view_436 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_19, [2, 32, 20, 1024]), kwargs = {}) | |
%var_mean_86 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_436, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_172 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_86, 0), kwargs = {}) | |
%getitem_173 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_86, 1), kwargs = {}) | |
%add_270 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_172, 1e-06), kwargs = {}) | |
%rsqrt_86 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_270,), kwargs = {}) | |
%sub_86 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_436, %getitem_173), kwargs = {}) | |
%mul_187 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_86, %rsqrt_86), kwargs = {}) | |
%view_437 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_187, [2, 640, 32, 32]), kwargs = {}) | |
%_param_constant540 : [#users=1] = get_attr[target=_param_constant540] | |
%unsqueeze_340 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant540, 0), kwargs = {}) | |
%unsqueeze_341 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_340, 2), kwargs = {}) | |
%unsqueeze_342 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_341, 3), kwargs = {}) | |
%_param_constant541 : [#users=1] = get_attr[target=_param_constant541] | |
%unsqueeze_343 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant541, 0), kwargs = {}) | |
%unsqueeze_344 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_343, 2), kwargs = {}) | |
%unsqueeze_345 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_344, 3), kwargs = {}) | |
%mul_188 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_437, %unsqueeze_345), kwargs = {}) | |
%add_271 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_188, %unsqueeze_342), kwargs = {}) | |
%squeeze_200 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_173, 3), kwargs = {}) | |
%squeeze_201 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_200, 2), kwargs = {}) | |
%squeeze_202 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_86, 3), kwargs = {}) | |
%squeeze_203 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_202, 2), kwargs = {}) | |
%detach_124 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_201,), kwargs = {}) | |
%detach_125 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_203,), kwargs = {}) | |
%permute_120 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_271, [0, 2, 3, 1]), kwargs = {}) | |
%view_438 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_120, [2, 1024, 640]), kwargs = {}) | |
%_param_constant542 : [#users=1] = get_attr[target=_param_constant542] | |
%t_165 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant542,), kwargs = {}) | |
%expand_25 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_438, [2, 1024, 640]), kwargs = {}) | |
%view_439 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_25, [2, 1024, 640]), kwargs = {}) | |
%expand_26 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_165, [2, 640, 640]), kwargs = {}) | |
%view_440 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_26, [2, 640, 640]), kwargs = {}) | |
%bmm_36 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_439, %view_440), kwargs = {}) | |
%_unsafe_view_180 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_36, [2, 1024, 640]), kwargs = {}) | |
%_param_constant543 : [#users=1] = get_attr[target=_param_constant543] | |
%add_272 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_180, %_param_constant543), kwargs = {}) | |
%var_mean_87 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_272, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_174 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_87, 0), kwargs = {}) | |
%getitem_175 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_87, 1), kwargs = {}) | |
%add_273 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_174, 1e-05), kwargs = {}) | |
%rsqrt_87 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_273,), kwargs = {}) | |
%sub_87 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_272, %getitem_175), kwargs = {}) | |
%mul_189 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_87, %rsqrt_87), kwargs = {}) | |
%_param_constant544 : [#users=1] = get_attr[target=_param_constant544] | |
%mul_190 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_189, %_param_constant544), kwargs = {}) | |
%_param_constant545 : [#users=1] = get_attr[target=_param_constant545] | |
%add_274 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_190, %_param_constant545), kwargs = {}) | |
%_param_constant546 : [#users=1] = get_attr[target=_param_constant546] | |
%t_166 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant546,), kwargs = {}) | |
%view_441 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_274, [2048, 640]), kwargs = {}) | |
%mm_72 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_441, %t_166), kwargs = {}) | |
%_unsafe_view_181 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_72, [2, 1024, 640]), kwargs = {}) | |
%view_442 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_181, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_121 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_442, [0, 2, 1, 3]), kwargs = {}) | |
%clone_108 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_121,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_182 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_108, [20, 1024, 64]), kwargs = {}) | |
%_param_constant547 : [#users=1] = get_attr[target=_param_constant547] | |
%t_167 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant547,), kwargs = {}) | |
%view_443 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_274, [2048, 640]), kwargs = {}) | |
%mm_73 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_443, %t_167), kwargs = {}) | |
%_unsafe_view_183 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_73, [2, 1024, 640]), kwargs = {}) | |
%_param_constant548 : [#users=1] = get_attr[target=_param_constant548] | |
%t_168 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant548,), kwargs = {}) | |
%view_444 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_274, [2048, 640]), kwargs = {}) | |
%mm_74 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_444, %t_168), kwargs = {}) | |
%_unsafe_view_184 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_74, [2, 1024, 640]), kwargs = {}) | |
%view_445 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_183, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_122 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_445, [0, 2, 1, 3]), kwargs = {}) | |
%clone_109 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_122,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_185 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_109, [20, 1024, 64]), kwargs = {}) | |
%view_446 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_184, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_123 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_446, [0, 2, 1, 3]), kwargs = {}) | |
%clone_110 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_123,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_186 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_110, [20, 1024, 64]), kwargs = {}) | |
%empty_24 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 1024],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_24 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_185, -1, -2), kwargs = {}) | |
%baddbmm_24 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_24, %_unsafe_view_182, %transpose_24), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_24 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_24, -1, False), kwargs = {}) | |
%detach_126 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_24,), kwargs = {}) | |
%bmm_37 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_24, %_unsafe_view_186), kwargs = {}) | |
%view_447 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_37, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_124 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_447, [0, 2, 1, 3]), kwargs = {}) | |
%clone_111 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_124,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_187 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_111, [2, 1024, 640]), kwargs = {}) | |
%_param_constant549 : [#users=1] = get_attr[target=_param_constant549] | |
%t_169 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant549,), kwargs = {}) | |
%view_448 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_187, [2048, 640]), kwargs = {}) | |
%_param_constant550 : [#users=1] = get_attr[target=_param_constant550] | |
%addmm_81 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant550, %view_448, %t_169), kwargs = {}) | |
%view_449 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_81, [2, 1024, 640]), kwargs = {}) | |
%add_275 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_449, %add_272), kwargs = {}) | |
%var_mean_88 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_275, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_176 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_88, 0), kwargs = {}) | |
%getitem_177 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_88, 1), kwargs = {}) | |
%add_276 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_176, 1e-05), kwargs = {}) | |
%rsqrt_88 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_276,), kwargs = {}) | |
%sub_88 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_275, %getitem_177), kwargs = {}) | |
%mul_191 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_88, %rsqrt_88), kwargs = {}) | |
%_param_constant551 : [#users=1] = get_attr[target=_param_constant551] | |
%mul_192 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_191, %_param_constant551), kwargs = {}) | |
%_param_constant552 : [#users=1] = get_attr[target=_param_constant552] | |
%add_277 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_192, %_param_constant552), kwargs = {}) | |
%_param_constant553 : [#users=1] = get_attr[target=_param_constant553] | |
%t_170 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant553,), kwargs = {}) | |
%view_450 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_277, [2048, 640]), kwargs = {}) | |
%mm_75 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_450, %t_170), kwargs = {}) | |
%_unsafe_view_188 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_75, [2, 1024, 640]), kwargs = {}) | |
%view_451 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_188, [2, 1024, 10, 64]), kwargs = {}) | |
%permute_125 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_451, [0, 2, 1, 3]), kwargs = {}) | |
%clone_112 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_125,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_189 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_112, [20, 1024, 64]), kwargs = {}) | |
%_param_constant554 : [#users=1] = get_attr[target=_param_constant554] | |
%t_171 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant554,), kwargs = {}) | |
%view_452 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_76 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_452, %t_171), kwargs = {}) | |
%_unsafe_view_190 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_76, [2, 64, 640]), kwargs = {}) | |
%_param_constant555 : [#users=1] = get_attr[target=_param_constant555] | |
%t_172 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant555,), kwargs = {}) | |
%view_453 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_77 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_453, %t_172), kwargs = {}) | |
%_unsafe_view_191 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_77, [2, 64, 640]), kwargs = {}) | |
%view_454 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_190, [2, 64, 10, 64]), kwargs = {}) | |
%permute_126 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_454, [0, 2, 1, 3]), kwargs = {}) | |
%clone_113 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_126,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_192 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_113, [20, 64, 64]), kwargs = {}) | |
%view_455 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_191, [2, 64, 10, 64]), kwargs = {}) | |
%permute_127 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_455, [0, 2, 1, 3]), kwargs = {}) | |
%clone_114 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_127,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_193 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_114, [20, 64, 64]), kwargs = {}) | |
%empty_25 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([20, 1024, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_25 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_192, -1, -2), kwargs = {}) | |
%baddbmm_25 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_25, %_unsafe_view_189, %transpose_25), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_25 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_25, -1, False), kwargs = {}) | |
%detach_127 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_25,), kwargs = {}) | |
%bmm_38 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_25, %_unsafe_view_193), kwargs = {}) | |
%view_456 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_38, [2, 10, 1024, 64]), kwargs = {}) | |
%permute_128 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_456, [0, 2, 1, 3]), kwargs = {}) | |
%clone_115 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_128,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_194 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_115, [2, 1024, 640]), kwargs = {}) | |
%_param_constant556 : [#users=1] = get_attr[target=_param_constant556] | |
%t_173 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant556,), kwargs = {}) | |
%view_457 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_194, [2048, 640]), kwargs = {}) | |
%_param_constant557 : [#users=1] = get_attr[target=_param_constant557] | |
%addmm_82 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant557, %view_457, %t_173), kwargs = {}) | |
%view_458 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_82, [2, 1024, 640]), kwargs = {}) | |
%add_278 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_458, %add_275), kwargs = {}) | |
%var_mean_89 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_278, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_178 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_89, 0), kwargs = {}) | |
%getitem_179 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_89, 1), kwargs = {}) | |
%add_279 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_178, 1e-05), kwargs = {}) | |
%rsqrt_89 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_279,), kwargs = {}) | |
%sub_89 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_278, %getitem_179), kwargs = {}) | |
%mul_193 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_89, %rsqrt_89), kwargs = {}) | |
%_param_constant558 : [#users=1] = get_attr[target=_param_constant558] | |
%mul_194 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_193, %_param_constant558), kwargs = {}) | |
%_param_constant559 : [#users=1] = get_attr[target=_param_constant559] | |
%add_280 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_194, %_param_constant559), kwargs = {}) | |
%_param_constant560 : [#users=1] = get_attr[target=_param_constant560] | |
%t_174 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant560,), kwargs = {}) | |
%view_459 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_280, [2048, 640]), kwargs = {}) | |
%_param_constant561 : [#users=1] = get_attr[target=_param_constant561] | |
%addmm_83 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant561, %view_459, %t_174), kwargs = {}) | |
%view_460 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_83, [2, 1024, 5120]), kwargs = {}) | |
%slice_69 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_460, -1, 0, 2560), kwargs = {}) | |
%slice_70 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_460, -1, 2560, 5120), kwargs = {}) | |
%gelu_12 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_70,), kwargs = {}) | |
%mul_195 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_69, %gelu_12), kwargs = {}) | |
%_param_constant562 : [#users=1] = get_attr[target=_param_constant562] | |
%t_175 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant562,), kwargs = {}) | |
%view_461 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_195, [2048, 2560]), kwargs = {}) | |
%_param_constant563 : [#users=1] = get_attr[target=_param_constant563] | |
%addmm_84 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant563, %view_461, %t_175), kwargs = {}) | |
%view_462 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_84, [2, 1024, 640]), kwargs = {}) | |
%add_281 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_462, %add_278), kwargs = {}) | |
%_param_constant564 : [#users=1] = get_attr[target=_param_constant564] | |
%t_176 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant564,), kwargs = {}) | |
%view_463 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_281, [2048, 640]), kwargs = {}) | |
%_param_constant565 : [#users=1] = get_attr[target=_param_constant565] | |
%addmm_85 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant565, %view_463, %t_176), kwargs = {}) | |
%view_464 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_85, [2, 1024, 640]), kwargs = {}) | |
%view_465 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_464, [2, 32, 32, 640]), kwargs = {}) | |
%permute_129 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_465, [0, 3, 1, 2]), kwargs = {}) | |
%clone_116 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_129,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_282 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%clone_116, %div_19), kwargs = {}) | |
%upsample_nearest2d_2 : [#users=1] = call_function[target=torch.ops.aten.upsample_nearest2d](args = (%add_282, [64, 64], 2.0, 2.0), kwargs = {}) | |
%_param_constant566 : [#users=1] = get_attr[target=_param_constant566] | |
%_param_constant567 : [#users=1] = get_attr[target=_param_constant567] | |
%convolution_55 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%upsample_nearest2d_2, %_param_constant566, %_param_constant567, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%cat_12 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%convolution_55, %add_37], 1), kwargs = {}) | |
%view_466 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_12, [2, 32, 30, 4096]), kwargs = {}) | |
%var_mean_90 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_466, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_180 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_90, 0), kwargs = {}) | |
%getitem_181 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_90, 1), kwargs = {}) | |
%add_283 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_180, 1e-05), kwargs = {}) | |
%rsqrt_90 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_283,), kwargs = {}) | |
%sub_90 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_466, %getitem_181), kwargs = {}) | |
%mul_196 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_90, %rsqrt_90), kwargs = {}) | |
%view_467 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_196, [2, 960, 64, 64]), kwargs = {}) | |
%_param_constant568 : [#users=1] = get_attr[target=_param_constant568] | |
%unsqueeze_346 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant568, 0), kwargs = {}) | |
%unsqueeze_347 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_346, 2), kwargs = {}) | |
%unsqueeze_348 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_347, 3), kwargs = {}) | |
%_param_constant569 : [#users=1] = get_attr[target=_param_constant569] | |
%unsqueeze_349 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant569, 0), kwargs = {}) | |
%unsqueeze_350 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_349, 2), kwargs = {}) | |
%unsqueeze_351 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_350, 3), kwargs = {}) | |
%mul_197 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_467, %unsqueeze_351), kwargs = {}) | |
%add_284 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_197, %unsqueeze_348), kwargs = {}) | |
%squeeze_204 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_181, 3), kwargs = {}) | |
%squeeze_205 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_204, 2), kwargs = {}) | |
%squeeze_206 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_90, 3), kwargs = {}) | |
%squeeze_207 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_206, 2), kwargs = {}) | |
%detach_128 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_205,), kwargs = {}) | |
%detach_129 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_207,), kwargs = {}) | |
%silu_58 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_284,), kwargs = {}) | |
%_param_constant570 : [#users=1] = get_attr[target=_param_constant570] | |
%_param_constant571 : [#users=1] = get_attr[target=_param_constant571] | |
%convolution_56 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_58, %_param_constant570, %_param_constant571, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_59 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant572 : [#users=1] = get_attr[target=_param_constant572] | |
%t_177 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant572,), kwargs = {}) | |
%_param_constant573 : [#users=1] = get_attr[target=_param_constant573] | |
%addmm_86 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant573, %silu_59, %t_177), kwargs = {}) | |
%slice_71 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_86, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_72 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_71, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_352 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_72, 2), kwargs = {}) | |
%unsqueeze_353 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_352, 3), kwargs = {}) | |
%add_285 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_56, %unsqueeze_353), kwargs = {}) | |
%view_468 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_285, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_91 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_468, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_182 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_91, 0), kwargs = {}) | |
%getitem_183 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_91, 1), kwargs = {}) | |
%add_286 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_182, 1e-05), kwargs = {}) | |
%rsqrt_91 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_286,), kwargs = {}) | |
%sub_91 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_468, %getitem_183), kwargs = {}) | |
%mul_198 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_91, %rsqrt_91), kwargs = {}) | |
%view_469 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_198, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant574 : [#users=1] = get_attr[target=_param_constant574] | |
%unsqueeze_354 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant574, 0), kwargs = {}) | |
%unsqueeze_355 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_354, 2), kwargs = {}) | |
%unsqueeze_356 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_355, 3), kwargs = {}) | |
%_param_constant575 : [#users=1] = get_attr[target=_param_constant575] | |
%unsqueeze_357 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant575, 0), kwargs = {}) | |
%unsqueeze_358 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_357, 2), kwargs = {}) | |
%unsqueeze_359 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_358, 3), kwargs = {}) | |
%mul_199 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_469, %unsqueeze_359), kwargs = {}) | |
%add_287 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_199, %unsqueeze_356), kwargs = {}) | |
%squeeze_208 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_183, 3), kwargs = {}) | |
%squeeze_209 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_208, 2), kwargs = {}) | |
%squeeze_210 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_91, 3), kwargs = {}) | |
%squeeze_211 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_210, 2), kwargs = {}) | |
%detach_130 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_209,), kwargs = {}) | |
%detach_131 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_211,), kwargs = {}) | |
%silu_60 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_287,), kwargs = {}) | |
%_param_constant576 : [#users=1] = get_attr[target=_param_constant576] | |
%_param_constant577 : [#users=1] = get_attr[target=_param_constant577] | |
%convolution_57 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_60, %_param_constant576, %_param_constant577, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant578 : [#users=1] = get_attr[target=_param_constant578] | |
%_param_constant579 : [#users=1] = get_attr[target=_param_constant579] | |
%convolution_58 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_12, %_param_constant578, %_param_constant579, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_288 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_58, %convolution_57), kwargs = {}) | |
%div_20 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_288, 1.0), kwargs = {}) | |
%view_470 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_20, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_92 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_470, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_184 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_92, 0), kwargs = {}) | |
%getitem_185 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_92, 1), kwargs = {}) | |
%add_289 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_184, 1e-06), kwargs = {}) | |
%rsqrt_92 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_289,), kwargs = {}) | |
%sub_92 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_470, %getitem_185), kwargs = {}) | |
%mul_200 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_92, %rsqrt_92), kwargs = {}) | |
%view_471 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_200, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant580 : [#users=1] = get_attr[target=_param_constant580] | |
%unsqueeze_360 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant580, 0), kwargs = {}) | |
%unsqueeze_361 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_360, 2), kwargs = {}) | |
%unsqueeze_362 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_361, 3), kwargs = {}) | |
%_param_constant581 : [#users=1] = get_attr[target=_param_constant581] | |
%unsqueeze_363 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant581, 0), kwargs = {}) | |
%unsqueeze_364 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_363, 2), kwargs = {}) | |
%unsqueeze_365 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_364, 3), kwargs = {}) | |
%mul_201 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_471, %unsqueeze_365), kwargs = {}) | |
%add_290 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_201, %unsqueeze_362), kwargs = {}) | |
%squeeze_212 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_185, 3), kwargs = {}) | |
%squeeze_213 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_212, 2), kwargs = {}) | |
%squeeze_214 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_92, 3), kwargs = {}) | |
%squeeze_215 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_214, 2), kwargs = {}) | |
%detach_132 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_213,), kwargs = {}) | |
%detach_133 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_215,), kwargs = {}) | |
%permute_130 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_290, [0, 2, 3, 1]), kwargs = {}) | |
%view_472 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_130, [2, 4096, 320]), kwargs = {}) | |
%_param_constant582 : [#users=1] = get_attr[target=_param_constant582] | |
%t_178 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant582,), kwargs = {}) | |
%expand_27 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_472, [2, 4096, 320]), kwargs = {}) | |
%view_473 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_27, [2, 4096, 320]), kwargs = {}) | |
%expand_28 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_178, [2, 320, 320]), kwargs = {}) | |
%view_474 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_28, [2, 320, 320]), kwargs = {}) | |
%bmm_39 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_473, %view_474), kwargs = {}) | |
%_unsafe_view_195 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_39, [2, 4096, 320]), kwargs = {}) | |
%_param_constant583 : [#users=1] = get_attr[target=_param_constant583] | |
%add_291 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_195, %_param_constant583), kwargs = {}) | |
%var_mean_93 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_291, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_186 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_93, 0), kwargs = {}) | |
%getitem_187 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_93, 1), kwargs = {}) | |
%add_292 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_186, 1e-05), kwargs = {}) | |
%rsqrt_93 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_292,), kwargs = {}) | |
%sub_93 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_291, %getitem_187), kwargs = {}) | |
%mul_202 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_93, %rsqrt_93), kwargs = {}) | |
%_param_constant584 : [#users=1] = get_attr[target=_param_constant584] | |
%mul_203 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_202, %_param_constant584), kwargs = {}) | |
%_param_constant585 : [#users=1] = get_attr[target=_param_constant585] | |
%add_293 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_203, %_param_constant585), kwargs = {}) | |
%_param_constant586 : [#users=1] = get_attr[target=_param_constant586] | |
%t_179 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant586,), kwargs = {}) | |
%view_475 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_293, [8192, 320]), kwargs = {}) | |
%mm_78 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_475, %t_179), kwargs = {}) | |
%_unsafe_view_196 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_78, [2, 4096, 320]), kwargs = {}) | |
%view_476 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_196, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_131 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_476, [0, 2, 1, 3]), kwargs = {}) | |
%clone_117 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_131,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_197 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_117, [10, 4096, 64]), kwargs = {}) | |
%_param_constant587 : [#users=1] = get_attr[target=_param_constant587] | |
%t_180 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant587,), kwargs = {}) | |
%view_477 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_293, [8192, 320]), kwargs = {}) | |
%mm_79 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_477, %t_180), kwargs = {}) | |
%_unsafe_view_198 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_79, [2, 4096, 320]), kwargs = {}) | |
%_param_constant588 : [#users=1] = get_attr[target=_param_constant588] | |
%t_181 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant588,), kwargs = {}) | |
%view_478 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_293, [8192, 320]), kwargs = {}) | |
%mm_80 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_478, %t_181), kwargs = {}) | |
%_unsafe_view_199 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_80, [2, 4096, 320]), kwargs = {}) | |
%view_479 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_198, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_132 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_479, [0, 2, 1, 3]), kwargs = {}) | |
%clone_118 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_132,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_200 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_118, [10, 4096, 64]), kwargs = {}) | |
%view_480 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_199, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_133 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_480, [0, 2, 1, 3]), kwargs = {}) | |
%clone_119 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_133,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_201 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_119, [10, 4096, 64]), kwargs = {}) | |
%empty_26 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 4096],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_26 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_200, -1, -2), kwargs = {}) | |
%baddbmm_26 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_26, %_unsafe_view_197, %transpose_26), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_26 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_26, -1, False), kwargs = {}) | |
%detach_134 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_26,), kwargs = {}) | |
%bmm_40 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_26, %_unsafe_view_201), kwargs = {}) | |
%view_481 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_40, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_134 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_481, [0, 2, 1, 3]), kwargs = {}) | |
%clone_120 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_134,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_202 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_120, [2, 4096, 320]), kwargs = {}) | |
%_param_constant589 : [#users=1] = get_attr[target=_param_constant589] | |
%t_182 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant589,), kwargs = {}) | |
%view_482 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_202, [8192, 320]), kwargs = {}) | |
%_param_constant590 : [#users=1] = get_attr[target=_param_constant590] | |
%addmm_87 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant590, %view_482, %t_182), kwargs = {}) | |
%view_483 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_87, [2, 4096, 320]), kwargs = {}) | |
%add_294 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_483, %add_291), kwargs = {}) | |
%var_mean_94 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_294, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_188 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_94, 0), kwargs = {}) | |
%getitem_189 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_94, 1), kwargs = {}) | |
%add_295 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_188, 1e-05), kwargs = {}) | |
%rsqrt_94 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_295,), kwargs = {}) | |
%sub_94 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_294, %getitem_189), kwargs = {}) | |
%mul_204 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_94, %rsqrt_94), kwargs = {}) | |
%_param_constant591 : [#users=1] = get_attr[target=_param_constant591] | |
%mul_205 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_204, %_param_constant591), kwargs = {}) | |
%_param_constant592 : [#users=1] = get_attr[target=_param_constant592] | |
%add_296 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_205, %_param_constant592), kwargs = {}) | |
%_param_constant593 : [#users=1] = get_attr[target=_param_constant593] | |
%t_183 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant593,), kwargs = {}) | |
%view_484 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_296, [8192, 320]), kwargs = {}) | |
%mm_81 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_484, %t_183), kwargs = {}) | |
%_unsafe_view_203 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_81, [2, 4096, 320]), kwargs = {}) | |
%view_485 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_203, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_135 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_485, [0, 2, 1, 3]), kwargs = {}) | |
%clone_121 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_135,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_204 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_121, [10, 4096, 64]), kwargs = {}) | |
%_param_constant594 : [#users=1] = get_attr[target=_param_constant594] | |
%t_184 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant594,), kwargs = {}) | |
%view_486 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_82 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_486, %t_184), kwargs = {}) | |
%_unsafe_view_205 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_82, [2, 64, 320]), kwargs = {}) | |
%_param_constant595 : [#users=1] = get_attr[target=_param_constant595] | |
%t_185 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant595,), kwargs = {}) | |
%view_487 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_83 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_487, %t_185), kwargs = {}) | |
%_unsafe_view_206 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_83, [2, 64, 320]), kwargs = {}) | |
%view_488 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_205, [2, 64, 5, 64]), kwargs = {}) | |
%permute_136 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_488, [0, 2, 1, 3]), kwargs = {}) | |
%clone_122 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_136,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_207 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_122, [10, 64, 64]), kwargs = {}) | |
%view_489 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_206, [2, 64, 5, 64]), kwargs = {}) | |
%permute_137 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_489, [0, 2, 1, 3]), kwargs = {}) | |
%clone_123 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_137,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_208 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_123, [10, 64, 64]), kwargs = {}) | |
%empty_27 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_27 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_207, -1, -2), kwargs = {}) | |
%baddbmm_27 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_27, %_unsafe_view_204, %transpose_27), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_27 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_27, -1, False), kwargs = {}) | |
%detach_135 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_27,), kwargs = {}) | |
%bmm_41 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_27, %_unsafe_view_208), kwargs = {}) | |
%view_490 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_41, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_138 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_490, [0, 2, 1, 3]), kwargs = {}) | |
%clone_124 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_138,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_209 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_124, [2, 4096, 320]), kwargs = {}) | |
%_param_constant596 : [#users=1] = get_attr[target=_param_constant596] | |
%t_186 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant596,), kwargs = {}) | |
%view_491 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_209, [8192, 320]), kwargs = {}) | |
%_param_constant597 : [#users=1] = get_attr[target=_param_constant597] | |
%addmm_88 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant597, %view_491, %t_186), kwargs = {}) | |
%view_492 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_88, [2, 4096, 320]), kwargs = {}) | |
%add_297 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_492, %add_294), kwargs = {}) | |
%var_mean_95 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_297, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_190 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_95, 0), kwargs = {}) | |
%getitem_191 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_95, 1), kwargs = {}) | |
%add_298 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_190, 1e-05), kwargs = {}) | |
%rsqrt_95 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_298,), kwargs = {}) | |
%sub_95 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_297, %getitem_191), kwargs = {}) | |
%mul_206 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_95, %rsqrt_95), kwargs = {}) | |
%_param_constant598 : [#users=1] = get_attr[target=_param_constant598] | |
%mul_207 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_206, %_param_constant598), kwargs = {}) | |
%_param_constant599 : [#users=1] = get_attr[target=_param_constant599] | |
%add_299 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_207, %_param_constant599), kwargs = {}) | |
%_param_constant600 : [#users=1] = get_attr[target=_param_constant600] | |
%t_187 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant600,), kwargs = {}) | |
%view_493 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_299, [8192, 320]), kwargs = {}) | |
%_param_constant601 : [#users=1] = get_attr[target=_param_constant601] | |
%addmm_89 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant601, %view_493, %t_187), kwargs = {}) | |
%view_494 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_89, [2, 4096, 2560]), kwargs = {}) | |
%slice_73 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_494, -1, 0, 1280), kwargs = {}) | |
%slice_74 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_494, -1, 1280, 2560), kwargs = {}) | |
%gelu_13 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_74,), kwargs = {}) | |
%mul_208 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_73, %gelu_13), kwargs = {}) | |
%_param_constant602 : [#users=1] = get_attr[target=_param_constant602] | |
%t_188 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant602,), kwargs = {}) | |
%view_495 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_208, [8192, 1280]), kwargs = {}) | |
%_param_constant603 : [#users=1] = get_attr[target=_param_constant603] | |
%addmm_90 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant603, %view_495, %t_188), kwargs = {}) | |
%view_496 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_90, [2, 4096, 320]), kwargs = {}) | |
%add_300 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_496, %add_297), kwargs = {}) | |
%_param_constant604 : [#users=1] = get_attr[target=_param_constant604] | |
%t_189 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant604,), kwargs = {}) | |
%view_497 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_300, [8192, 320]), kwargs = {}) | |
%_param_constant605 : [#users=1] = get_attr[target=_param_constant605] | |
%addmm_91 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant605, %view_497, %t_189), kwargs = {}) | |
%view_498 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_91, [2, 4096, 320]), kwargs = {}) | |
%view_499 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_498, [2, 64, 64, 320]), kwargs = {}) | |
%permute_139 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_499, [0, 3, 1, 2]), kwargs = {}) | |
%clone_125 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_139,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_301 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%clone_125, %div_20), kwargs = {}) | |
%cat_13 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%add_301, %add_18], 1), kwargs = {}) | |
%view_500 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_13, [2, 32, 20, 4096]), kwargs = {}) | |
%var_mean_96 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_500, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_192 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_96, 0), kwargs = {}) | |
%getitem_193 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_96, 1), kwargs = {}) | |
%add_302 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_192, 1e-05), kwargs = {}) | |
%rsqrt_96 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_302,), kwargs = {}) | |
%sub_96 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_500, %getitem_193), kwargs = {}) | |
%mul_209 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_96, %rsqrt_96), kwargs = {}) | |
%view_501 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_209, [2, 640, 64, 64]), kwargs = {}) | |
%_param_constant606 : [#users=1] = get_attr[target=_param_constant606] | |
%unsqueeze_366 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant606, 0), kwargs = {}) | |
%unsqueeze_367 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_366, 2), kwargs = {}) | |
%unsqueeze_368 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_367, 3), kwargs = {}) | |
%_param_constant607 : [#users=1] = get_attr[target=_param_constant607] | |
%unsqueeze_369 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant607, 0), kwargs = {}) | |
%unsqueeze_370 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_369, 2), kwargs = {}) | |
%unsqueeze_371 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_370, 3), kwargs = {}) | |
%mul_210 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_501, %unsqueeze_371), kwargs = {}) | |
%add_303 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_210, %unsqueeze_368), kwargs = {}) | |
%squeeze_216 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_193, 3), kwargs = {}) | |
%squeeze_217 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_216, 2), kwargs = {}) | |
%squeeze_218 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_96, 3), kwargs = {}) | |
%squeeze_219 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_218, 2), kwargs = {}) | |
%detach_136 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_217,), kwargs = {}) | |
%detach_137 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_219,), kwargs = {}) | |
%silu_61 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_303,), kwargs = {}) | |
%_param_constant608 : [#users=1] = get_attr[target=_param_constant608] | |
%_param_constant609 : [#users=1] = get_attr[target=_param_constant609] | |
%convolution_59 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_61, %_param_constant608, %_param_constant609, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_62 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant610 : [#users=1] = get_attr[target=_param_constant610] | |
%t_190 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant610,), kwargs = {}) | |
%_param_constant611 : [#users=1] = get_attr[target=_param_constant611] | |
%addmm_92 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant611, %silu_62, %t_190), kwargs = {}) | |
%slice_75 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_92, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_76 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_75, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_372 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_76, 2), kwargs = {}) | |
%unsqueeze_373 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_372, 3), kwargs = {}) | |
%add_304 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_59, %unsqueeze_373), kwargs = {}) | |
%view_502 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_304, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_97 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_502, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_194 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_97, 0), kwargs = {}) | |
%getitem_195 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_97, 1), kwargs = {}) | |
%add_305 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_194, 1e-05), kwargs = {}) | |
%rsqrt_97 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_305,), kwargs = {}) | |
%sub_97 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_502, %getitem_195), kwargs = {}) | |
%mul_211 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_97, %rsqrt_97), kwargs = {}) | |
%view_503 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_211, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant612 : [#users=1] = get_attr[target=_param_constant612] | |
%unsqueeze_374 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant612, 0), kwargs = {}) | |
%unsqueeze_375 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_374, 2), kwargs = {}) | |
%unsqueeze_376 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_375, 3), kwargs = {}) | |
%_param_constant613 : [#users=1] = get_attr[target=_param_constant613] | |
%unsqueeze_377 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant613, 0), kwargs = {}) | |
%unsqueeze_378 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_377, 2), kwargs = {}) | |
%unsqueeze_379 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_378, 3), kwargs = {}) | |
%mul_212 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_503, %unsqueeze_379), kwargs = {}) | |
%add_306 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_212, %unsqueeze_376), kwargs = {}) | |
%squeeze_220 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_195, 3), kwargs = {}) | |
%squeeze_221 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_220, 2), kwargs = {}) | |
%squeeze_222 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_97, 3), kwargs = {}) | |
%squeeze_223 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_222, 2), kwargs = {}) | |
%detach_138 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_221,), kwargs = {}) | |
%detach_139 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_223,), kwargs = {}) | |
%silu_63 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_306,), kwargs = {}) | |
%_param_constant614 : [#users=1] = get_attr[target=_param_constant614] | |
%_param_constant615 : [#users=1] = get_attr[target=_param_constant615] | |
%convolution_60 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_63, %_param_constant614, %_param_constant615, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant616 : [#users=1] = get_attr[target=_param_constant616] | |
%_param_constant617 : [#users=1] = get_attr[target=_param_constant617] | |
%convolution_61 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_13, %_param_constant616, %_param_constant617, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_307 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_61, %convolution_60), kwargs = {}) | |
%div_21 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_307, 1.0), kwargs = {}) | |
%view_504 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_21, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_98 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_504, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_196 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_98, 0), kwargs = {}) | |
%getitem_197 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_98, 1), kwargs = {}) | |
%add_308 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_196, 1e-06), kwargs = {}) | |
%rsqrt_98 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_308,), kwargs = {}) | |
%sub_98 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_504, %getitem_197), kwargs = {}) | |
%mul_213 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_98, %rsqrt_98), kwargs = {}) | |
%view_505 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_213, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant618 : [#users=1] = get_attr[target=_param_constant618] | |
%unsqueeze_380 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant618, 0), kwargs = {}) | |
%unsqueeze_381 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_380, 2), kwargs = {}) | |
%unsqueeze_382 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_381, 3), kwargs = {}) | |
%_param_constant619 : [#users=1] = get_attr[target=_param_constant619] | |
%unsqueeze_383 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant619, 0), kwargs = {}) | |
%unsqueeze_384 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_383, 2), kwargs = {}) | |
%unsqueeze_385 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_384, 3), kwargs = {}) | |
%mul_214 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_505, %unsqueeze_385), kwargs = {}) | |
%add_309 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_214, %unsqueeze_382), kwargs = {}) | |
%squeeze_224 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_197, 3), kwargs = {}) | |
%squeeze_225 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_224, 2), kwargs = {}) | |
%squeeze_226 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_98, 3), kwargs = {}) | |
%squeeze_227 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_226, 2), kwargs = {}) | |
%detach_140 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_225,), kwargs = {}) | |
%detach_141 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_227,), kwargs = {}) | |
%permute_140 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_309, [0, 2, 3, 1]), kwargs = {}) | |
%view_506 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_140, [2, 4096, 320]), kwargs = {}) | |
%_param_constant620 : [#users=1] = get_attr[target=_param_constant620] | |
%t_191 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant620,), kwargs = {}) | |
%expand_29 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_506, [2, 4096, 320]), kwargs = {}) | |
%view_507 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_29, [2, 4096, 320]), kwargs = {}) | |
%expand_30 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_191, [2, 320, 320]), kwargs = {}) | |
%view_508 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_30, [2, 320, 320]), kwargs = {}) | |
%bmm_42 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_507, %view_508), kwargs = {}) | |
%_unsafe_view_210 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_42, [2, 4096, 320]), kwargs = {}) | |
%_param_constant621 : [#users=1] = get_attr[target=_param_constant621] | |
%add_310 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_210, %_param_constant621), kwargs = {}) | |
%var_mean_99 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_310, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_198 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_99, 0), kwargs = {}) | |
%getitem_199 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_99, 1), kwargs = {}) | |
%add_311 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_198, 1e-05), kwargs = {}) | |
%rsqrt_99 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_311,), kwargs = {}) | |
%sub_99 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_310, %getitem_199), kwargs = {}) | |
%mul_215 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_99, %rsqrt_99), kwargs = {}) | |
%_param_constant622 : [#users=1] = get_attr[target=_param_constant622] | |
%mul_216 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_215, %_param_constant622), kwargs = {}) | |
%_param_constant623 : [#users=1] = get_attr[target=_param_constant623] | |
%add_312 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_216, %_param_constant623), kwargs = {}) | |
%_param_constant624 : [#users=1] = get_attr[target=_param_constant624] | |
%t_192 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant624,), kwargs = {}) | |
%view_509 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_312, [8192, 320]), kwargs = {}) | |
%mm_84 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_509, %t_192), kwargs = {}) | |
%_unsafe_view_211 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_84, [2, 4096, 320]), kwargs = {}) | |
%view_510 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_211, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_141 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_510, [0, 2, 1, 3]), kwargs = {}) | |
%clone_126 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_141,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_212 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_126, [10, 4096, 64]), kwargs = {}) | |
%_param_constant625 : [#users=1] = get_attr[target=_param_constant625] | |
%t_193 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant625,), kwargs = {}) | |
%view_511 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_312, [8192, 320]), kwargs = {}) | |
%mm_85 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_511, %t_193), kwargs = {}) | |
%_unsafe_view_213 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_85, [2, 4096, 320]), kwargs = {}) | |
%_param_constant626 : [#users=1] = get_attr[target=_param_constant626] | |
%t_194 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant626,), kwargs = {}) | |
%view_512 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_312, [8192, 320]), kwargs = {}) | |
%mm_86 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_512, %t_194), kwargs = {}) | |
%_unsafe_view_214 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_86, [2, 4096, 320]), kwargs = {}) | |
%view_513 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_213, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_142 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_513, [0, 2, 1, 3]), kwargs = {}) | |
%clone_127 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_142,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_215 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_127, [10, 4096, 64]), kwargs = {}) | |
%view_514 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_214, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_143 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_514, [0, 2, 1, 3]), kwargs = {}) | |
%clone_128 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_143,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_216 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_128, [10, 4096, 64]), kwargs = {}) | |
%empty_28 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 4096],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_28 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_215, -1, -2), kwargs = {}) | |
%baddbmm_28 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_28, %_unsafe_view_212, %transpose_28), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_28 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_28, -1, False), kwargs = {}) | |
%detach_142 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_28,), kwargs = {}) | |
%bmm_43 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_28, %_unsafe_view_216), kwargs = {}) | |
%view_515 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_43, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_144 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_515, [0, 2, 1, 3]), kwargs = {}) | |
%clone_129 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_144,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_217 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_129, [2, 4096, 320]), kwargs = {}) | |
%_param_constant627 : [#users=1] = get_attr[target=_param_constant627] | |
%t_195 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant627,), kwargs = {}) | |
%view_516 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_217, [8192, 320]), kwargs = {}) | |
%_param_constant628 : [#users=1] = get_attr[target=_param_constant628] | |
%addmm_93 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant628, %view_516, %t_195), kwargs = {}) | |
%view_517 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_93, [2, 4096, 320]), kwargs = {}) | |
%add_313 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_517, %add_310), kwargs = {}) | |
%var_mean_100 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_313, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_200 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_100, 0), kwargs = {}) | |
%getitem_201 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_100, 1), kwargs = {}) | |
%add_314 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_200, 1e-05), kwargs = {}) | |
%rsqrt_100 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_314,), kwargs = {}) | |
%sub_100 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_313, %getitem_201), kwargs = {}) | |
%mul_217 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_100, %rsqrt_100), kwargs = {}) | |
%_param_constant629 : [#users=1] = get_attr[target=_param_constant629] | |
%mul_218 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_217, %_param_constant629), kwargs = {}) | |
%_param_constant630 : [#users=1] = get_attr[target=_param_constant630] | |
%add_315 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_218, %_param_constant630), kwargs = {}) | |
%_param_constant631 : [#users=1] = get_attr[target=_param_constant631] | |
%t_196 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant631,), kwargs = {}) | |
%view_518 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_315, [8192, 320]), kwargs = {}) | |
%mm_87 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_518, %t_196), kwargs = {}) | |
%_unsafe_view_218 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_87, [2, 4096, 320]), kwargs = {}) | |
%view_519 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_218, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_145 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_519, [0, 2, 1, 3]), kwargs = {}) | |
%clone_130 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_145,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_219 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_130, [10, 4096, 64]), kwargs = {}) | |
%_param_constant632 : [#users=1] = get_attr[target=_param_constant632] | |
%t_197 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant632,), kwargs = {}) | |
%view_520 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_88 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_520, %t_197), kwargs = {}) | |
%_unsafe_view_220 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_88, [2, 64, 320]), kwargs = {}) | |
%_param_constant633 : [#users=1] = get_attr[target=_param_constant633] | |
%t_198 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant633,), kwargs = {}) | |
%view_521 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_89 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_521, %t_198), kwargs = {}) | |
%_unsafe_view_221 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_89, [2, 64, 320]), kwargs = {}) | |
%view_522 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_220, [2, 64, 5, 64]), kwargs = {}) | |
%permute_146 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_522, [0, 2, 1, 3]), kwargs = {}) | |
%clone_131 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_146,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_222 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_131, [10, 64, 64]), kwargs = {}) | |
%view_523 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_221, [2, 64, 5, 64]), kwargs = {}) | |
%permute_147 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_523, [0, 2, 1, 3]), kwargs = {}) | |
%clone_132 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_147,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_223 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_132, [10, 64, 64]), kwargs = {}) | |
%empty_29 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_29 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_222, -1, -2), kwargs = {}) | |
%baddbmm_29 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_29, %_unsafe_view_219, %transpose_29), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_29 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_29, -1, False), kwargs = {}) | |
%detach_143 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_29,), kwargs = {}) | |
%bmm_44 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_29, %_unsafe_view_223), kwargs = {}) | |
%view_524 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_44, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_148 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_524, [0, 2, 1, 3]), kwargs = {}) | |
%clone_133 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_148,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_224 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_133, [2, 4096, 320]), kwargs = {}) | |
%_param_constant634 : [#users=1] = get_attr[target=_param_constant634] | |
%t_199 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant634,), kwargs = {}) | |
%view_525 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_224, [8192, 320]), kwargs = {}) | |
%_param_constant635 : [#users=1] = get_attr[target=_param_constant635] | |
%addmm_94 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant635, %view_525, %t_199), kwargs = {}) | |
%view_526 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_94, [2, 4096, 320]), kwargs = {}) | |
%add_316 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_526, %add_313), kwargs = {}) | |
%var_mean_101 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_316, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_202 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_101, 0), kwargs = {}) | |
%getitem_203 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_101, 1), kwargs = {}) | |
%add_317 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_202, 1e-05), kwargs = {}) | |
%rsqrt_101 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_317,), kwargs = {}) | |
%sub_101 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_316, %getitem_203), kwargs = {}) | |
%mul_219 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_101, %rsqrt_101), kwargs = {}) | |
%_param_constant636 : [#users=1] = get_attr[target=_param_constant636] | |
%mul_220 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_219, %_param_constant636), kwargs = {}) | |
%_param_constant637 : [#users=1] = get_attr[target=_param_constant637] | |
%add_318 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_220, %_param_constant637), kwargs = {}) | |
%_param_constant638 : [#users=1] = get_attr[target=_param_constant638] | |
%t_200 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant638,), kwargs = {}) | |
%view_527 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_318, [8192, 320]), kwargs = {}) | |
%_param_constant639 : [#users=1] = get_attr[target=_param_constant639] | |
%addmm_95 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant639, %view_527, %t_200), kwargs = {}) | |
%view_528 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_95, [2, 4096, 2560]), kwargs = {}) | |
%slice_77 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_528, -1, 0, 1280), kwargs = {}) | |
%slice_78 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_528, -1, 1280, 2560), kwargs = {}) | |
%gelu_14 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_78,), kwargs = {}) | |
%mul_221 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_77, %gelu_14), kwargs = {}) | |
%_param_constant640 : [#users=1] = get_attr[target=_param_constant640] | |
%t_201 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant640,), kwargs = {}) | |
%view_529 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_221, [8192, 1280]), kwargs = {}) | |
%_param_constant641 : [#users=1] = get_attr[target=_param_constant641] | |
%addmm_96 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant641, %view_529, %t_201), kwargs = {}) | |
%view_530 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_96, [2, 4096, 320]), kwargs = {}) | |
%add_319 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_530, %add_316), kwargs = {}) | |
%_param_constant642 : [#users=1] = get_attr[target=_param_constant642] | |
%t_202 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant642,), kwargs = {}) | |
%view_531 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_319, [8192, 320]), kwargs = {}) | |
%_param_constant643 : [#users=1] = get_attr[target=_param_constant643] | |
%addmm_97 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant643, %view_531, %t_202), kwargs = {}) | |
%view_532 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_97, [2, 4096, 320]), kwargs = {}) | |
%view_533 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_532, [2, 64, 64, 320]), kwargs = {}) | |
%permute_149 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_533, [0, 3, 1, 2]), kwargs = {}) | |
%clone_134 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_149,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_320 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%clone_134, %div_21), kwargs = {}) | |
%cat_14 : [#users=2] = call_function[target=torch.ops.aten.cat](args = ([%add_320, %convolution], 1), kwargs = {}) | |
%view_534 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%cat_14, [2, 32, 20, 4096]), kwargs = {}) | |
%var_mean_102 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_534, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_204 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_102, 0), kwargs = {}) | |
%getitem_205 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_102, 1), kwargs = {}) | |
%add_321 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_204, 1e-05), kwargs = {}) | |
%rsqrt_102 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_321,), kwargs = {}) | |
%sub_102 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_534, %getitem_205), kwargs = {}) | |
%mul_222 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_102, %rsqrt_102), kwargs = {}) | |
%view_535 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_222, [2, 640, 64, 64]), kwargs = {}) | |
%_param_constant644 : [#users=1] = get_attr[target=_param_constant644] | |
%unsqueeze_386 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant644, 0), kwargs = {}) | |
%unsqueeze_387 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_386, 2), kwargs = {}) | |
%unsqueeze_388 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_387, 3), kwargs = {}) | |
%_param_constant645 : [#users=1] = get_attr[target=_param_constant645] | |
%unsqueeze_389 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant645, 0), kwargs = {}) | |
%unsqueeze_390 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_389, 2), kwargs = {}) | |
%unsqueeze_391 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_390, 3), kwargs = {}) | |
%mul_223 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_535, %unsqueeze_391), kwargs = {}) | |
%add_322 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_223, %unsqueeze_388), kwargs = {}) | |
%squeeze_228 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_205, 3), kwargs = {}) | |
%squeeze_229 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_228, 2), kwargs = {}) | |
%squeeze_230 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_102, 3), kwargs = {}) | |
%squeeze_231 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_230, 2), kwargs = {}) | |
%detach_144 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_229,), kwargs = {}) | |
%detach_145 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_231,), kwargs = {}) | |
%silu_64 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_322,), kwargs = {}) | |
%_param_constant646 : [#users=1] = get_attr[target=_param_constant646] | |
%_param_constant647 : [#users=1] = get_attr[target=_param_constant647] | |
%convolution_62 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_64, %_param_constant646, %_param_constant647, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%silu_65 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%addmm_1,), kwargs = {}) | |
%_param_constant648 : [#users=1] = get_attr[target=_param_constant648] | |
%t_203 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant648,), kwargs = {}) | |
%_param_constant649 : [#users=1] = get_attr[target=_param_constant649] | |
%addmm_98 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant649, %silu_65, %t_203), kwargs = {}) | |
%slice_79 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%addmm_98, 0, 0, 9223372036854775807), kwargs = {}) | |
%slice_80 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%slice_79, 1, 0, 9223372036854775807), kwargs = {}) | |
%unsqueeze_392 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%slice_80, 2), kwargs = {}) | |
%unsqueeze_393 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_392, 3), kwargs = {}) | |
%add_323 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_62, %unsqueeze_393), kwargs = {}) | |
%view_536 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_323, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_103 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_536, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_206 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_103, 0), kwargs = {}) | |
%getitem_207 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_103, 1), kwargs = {}) | |
%add_324 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_206, 1e-05), kwargs = {}) | |
%rsqrt_103 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_324,), kwargs = {}) | |
%sub_103 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_536, %getitem_207), kwargs = {}) | |
%mul_224 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_103, %rsqrt_103), kwargs = {}) | |
%view_537 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_224, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant650 : [#users=1] = get_attr[target=_param_constant650] | |
%unsqueeze_394 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant650, 0), kwargs = {}) | |
%unsqueeze_395 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_394, 2), kwargs = {}) | |
%unsqueeze_396 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_395, 3), kwargs = {}) | |
%_param_constant651 : [#users=1] = get_attr[target=_param_constant651] | |
%unsqueeze_397 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant651, 0), kwargs = {}) | |
%unsqueeze_398 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_397, 2), kwargs = {}) | |
%unsqueeze_399 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_398, 3), kwargs = {}) | |
%mul_225 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_537, %unsqueeze_399), kwargs = {}) | |
%add_325 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_225, %unsqueeze_396), kwargs = {}) | |
%squeeze_232 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_207, 3), kwargs = {}) | |
%squeeze_233 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_232, 2), kwargs = {}) | |
%squeeze_234 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_103, 3), kwargs = {}) | |
%squeeze_235 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_234, 2), kwargs = {}) | |
%detach_146 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_233,), kwargs = {}) | |
%detach_147 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_235,), kwargs = {}) | |
%silu_66 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_325,), kwargs = {}) | |
%_param_constant652 : [#users=1] = get_attr[target=_param_constant652] | |
%_param_constant653 : [#users=1] = get_attr[target=_param_constant653] | |
%convolution_63 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%silu_66, %_param_constant652, %_param_constant653, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%_param_constant654 : [#users=1] = get_attr[target=_param_constant654] | |
%_param_constant655 : [#users=1] = get_attr[target=_param_constant655] | |
%convolution_64 : [#users=1] = call_function[target=torch.ops.aten.convolution](args = (%cat_14, %_param_constant654, %_param_constant655, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%add_326 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%convolution_64, %convolution_63), kwargs = {}) | |
%div_22 : [#users=2] = call_function[target=torch.ops.aten.div](args = (%add_326, 1.0), kwargs = {}) | |
%view_538 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%div_22, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_104 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_538, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_208 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_104, 0), kwargs = {}) | |
%getitem_209 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_104, 1), kwargs = {}) | |
%add_327 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_208, 1e-06), kwargs = {}) | |
%rsqrt_104 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_327,), kwargs = {}) | |
%sub_104 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_538, %getitem_209), kwargs = {}) | |
%mul_226 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_104, %rsqrt_104), kwargs = {}) | |
%view_539 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_226, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant656 : [#users=1] = get_attr[target=_param_constant656] | |
%unsqueeze_400 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant656, 0), kwargs = {}) | |
%unsqueeze_401 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_400, 2), kwargs = {}) | |
%unsqueeze_402 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_401, 3), kwargs = {}) | |
%_param_constant657 : [#users=1] = get_attr[target=_param_constant657] | |
%unsqueeze_403 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant657, 0), kwargs = {}) | |
%unsqueeze_404 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_403, 2), kwargs = {}) | |
%unsqueeze_405 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_404, 3), kwargs = {}) | |
%mul_227 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_539, %unsqueeze_405), kwargs = {}) | |
%add_328 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_227, %unsqueeze_402), kwargs = {}) | |
%squeeze_236 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_209, 3), kwargs = {}) | |
%squeeze_237 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_236, 2), kwargs = {}) | |
%squeeze_238 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_104, 3), kwargs = {}) | |
%squeeze_239 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_238, 2), kwargs = {}) | |
%detach_148 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_237,), kwargs = {}) | |
%detach_149 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_239,), kwargs = {}) | |
%permute_150 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%add_328, [0, 2, 3, 1]), kwargs = {}) | |
%view_540 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%permute_150, [2, 4096, 320]), kwargs = {}) | |
%_param_constant658 : [#users=1] = get_attr[target=_param_constant658] | |
%t_204 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant658,), kwargs = {}) | |
%expand_31 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%view_540, [2, 4096, 320]), kwargs = {}) | |
%view_541 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_31, [2, 4096, 320]), kwargs = {}) | |
%expand_32 : [#users=1] = call_function[target=torch.ops.aten.expand](args = (%t_204, [2, 320, 320]), kwargs = {}) | |
%view_542 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%expand_32, [2, 320, 320]), kwargs = {}) | |
%bmm_45 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%view_541, %view_542), kwargs = {}) | |
%_unsafe_view_225 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%bmm_45, [2, 4096, 320]), kwargs = {}) | |
%_param_constant659 : [#users=1] = get_attr[target=_param_constant659] | |
%add_329 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%_unsafe_view_225, %_param_constant659), kwargs = {}) | |
%var_mean_105 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_329, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_210 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_105, 0), kwargs = {}) | |
%getitem_211 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_105, 1), kwargs = {}) | |
%add_330 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_210, 1e-05), kwargs = {}) | |
%rsqrt_105 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_330,), kwargs = {}) | |
%sub_105 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_329, %getitem_211), kwargs = {}) | |
%mul_228 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_105, %rsqrt_105), kwargs = {}) | |
%_param_constant660 : [#users=1] = get_attr[target=_param_constant660] | |
%mul_229 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_228, %_param_constant660), kwargs = {}) | |
%_param_constant661 : [#users=1] = get_attr[target=_param_constant661] | |
%add_331 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%mul_229, %_param_constant661), kwargs = {}) | |
%_param_constant662 : [#users=1] = get_attr[target=_param_constant662] | |
%t_205 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant662,), kwargs = {}) | |
%view_543 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_331, [8192, 320]), kwargs = {}) | |
%mm_90 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_543, %t_205), kwargs = {}) | |
%_unsafe_view_226 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_90, [2, 4096, 320]), kwargs = {}) | |
%view_544 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_226, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_151 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_544, [0, 2, 1, 3]), kwargs = {}) | |
%clone_135 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_151,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_227 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_135, [10, 4096, 64]), kwargs = {}) | |
%_param_constant663 : [#users=1] = get_attr[target=_param_constant663] | |
%t_206 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant663,), kwargs = {}) | |
%view_545 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_331, [8192, 320]), kwargs = {}) | |
%mm_91 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_545, %t_206), kwargs = {}) | |
%_unsafe_view_228 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_91, [2, 4096, 320]), kwargs = {}) | |
%_param_constant664 : [#users=1] = get_attr[target=_param_constant664] | |
%t_207 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant664,), kwargs = {}) | |
%view_546 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_331, [8192, 320]), kwargs = {}) | |
%mm_92 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_546, %t_207), kwargs = {}) | |
%_unsafe_view_229 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_92, [2, 4096, 320]), kwargs = {}) | |
%view_547 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_228, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_152 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_547, [0, 2, 1, 3]), kwargs = {}) | |
%clone_136 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_152,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_230 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_136, [10, 4096, 64]), kwargs = {}) | |
%view_548 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_229, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_153 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_548, [0, 2, 1, 3]), kwargs = {}) | |
%clone_137 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_153,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_231 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_137, [10, 4096, 64]), kwargs = {}) | |
%empty_30 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 4096],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_30 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_230, -1, -2), kwargs = {}) | |
%baddbmm_30 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_30, %_unsafe_view_227, %transpose_30), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_30 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_30, -1, False), kwargs = {}) | |
%detach_150 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_30,), kwargs = {}) | |
%bmm_46 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_30, %_unsafe_view_231), kwargs = {}) | |
%view_549 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_46, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_154 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_549, [0, 2, 1, 3]), kwargs = {}) | |
%clone_138 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_154,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_232 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_138, [2, 4096, 320]), kwargs = {}) | |
%_param_constant665 : [#users=1] = get_attr[target=_param_constant665] | |
%t_208 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant665,), kwargs = {}) | |
%view_550 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_232, [8192, 320]), kwargs = {}) | |
%_param_constant666 : [#users=1] = get_attr[target=_param_constant666] | |
%addmm_99 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant666, %view_550, %t_208), kwargs = {}) | |
%view_551 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_99, [2, 4096, 320]), kwargs = {}) | |
%add_332 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_551, %add_329), kwargs = {}) | |
%var_mean_106 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_332, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_212 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_106, 0), kwargs = {}) | |
%getitem_213 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_106, 1), kwargs = {}) | |
%add_333 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_212, 1e-05), kwargs = {}) | |
%rsqrt_106 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_333,), kwargs = {}) | |
%sub_106 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_332, %getitem_213), kwargs = {}) | |
%mul_230 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_106, %rsqrt_106), kwargs = {}) | |
%_param_constant667 : [#users=1] = get_attr[target=_param_constant667] | |
%mul_231 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_230, %_param_constant667), kwargs = {}) | |
%_param_constant668 : [#users=1] = get_attr[target=_param_constant668] | |
%add_334 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_231, %_param_constant668), kwargs = {}) | |
%_param_constant669 : [#users=1] = get_attr[target=_param_constant669] | |
%t_209 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant669,), kwargs = {}) | |
%view_552 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_334, [8192, 320]), kwargs = {}) | |
%mm_93 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_552, %t_209), kwargs = {}) | |
%_unsafe_view_233 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_93, [2, 4096, 320]), kwargs = {}) | |
%view_553 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_233, [2, 4096, 5, 64]), kwargs = {}) | |
%permute_155 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_553, [0, 2, 1, 3]), kwargs = {}) | |
%clone_139 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_155,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_234 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_139, [10, 4096, 64]), kwargs = {}) | |
%_param_constant670 : [#users=1] = get_attr[target=_param_constant670] | |
%t_210 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant670,), kwargs = {}) | |
%view_554 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_94 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_554, %t_210), kwargs = {}) | |
%_unsafe_view_235 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_94, [2, 64, 320]), kwargs = {}) | |
%_param_constant671 : [#users=1] = get_attr[target=_param_constant671] | |
%t_211 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant671,), kwargs = {}) | |
%view_555 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%arg2_1, [128, 1024]), kwargs = {}) | |
%mm_95 : [#users=1] = call_function[target=torch.ops.aten.mm](args = (%view_555, %t_211), kwargs = {}) | |
%_unsafe_view_236 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%mm_95, [2, 64, 320]), kwargs = {}) | |
%view_556 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_235, [2, 64, 5, 64]), kwargs = {}) | |
%permute_156 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_556, [0, 2, 1, 3]), kwargs = {}) | |
%clone_140 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_156,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_237 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_140, [10, 64, 64]), kwargs = {}) | |
%view_557 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_236, [2, 64, 5, 64]), kwargs = {}) | |
%permute_157 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_557, [0, 2, 1, 3]), kwargs = {}) | |
%clone_141 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_157,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_238 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_141, [10, 64, 64]), kwargs = {}) | |
%empty_31 : [#users=1] = call_function[target=torch.ops.aten.empty](args = ([10, 4096, 64],), kwargs = {dtype: torch.float16, device: cpu, pin_memory: False}) | |
%transpose_31 : [#users=1] = call_function[target=torch.ops.aten.transpose](args = (%_unsafe_view_237, -1, -2), kwargs = {}) | |
%baddbmm_31 : [#users=1] = call_function[target=torch.ops.aten.baddbmm](args = (%empty_31, %_unsafe_view_234, %transpose_31), kwargs = {beta: 0, alpha: 0.125}) | |
%_softmax_31 : [#users=2] = call_function[target=torch.ops.aten._softmax](args = (%baddbmm_31, -1, False), kwargs = {}) | |
%detach_151 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%_softmax_31,), kwargs = {}) | |
%bmm_47 : [#users=1] = call_function[target=torch.ops.aten.bmm](args = (%_softmax_31, %_unsafe_view_238), kwargs = {}) | |
%view_558 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%bmm_47, [2, 5, 4096, 64]), kwargs = {}) | |
%permute_158 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_558, [0, 2, 1, 3]), kwargs = {}) | |
%clone_142 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_158,), kwargs = {memory_format: torch.contiguous_format}) | |
%_unsafe_view_239 : [#users=1] = call_function[target=torch.ops.aten._unsafe_view](args = (%clone_142, [2, 4096, 320]), kwargs = {}) | |
%_param_constant672 : [#users=1] = get_attr[target=_param_constant672] | |
%t_212 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant672,), kwargs = {}) | |
%view_559 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%_unsafe_view_239, [8192, 320]), kwargs = {}) | |
%_param_constant673 : [#users=1] = get_attr[target=_param_constant673] | |
%addmm_100 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant673, %view_559, %t_212), kwargs = {}) | |
%view_560 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_100, [2, 4096, 320]), kwargs = {}) | |
%add_335 : [#users=3] = call_function[target=torch.ops.aten.add](args = (%view_560, %add_332), kwargs = {}) | |
%var_mean_107 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%add_335, [2]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_214 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_107, 0), kwargs = {}) | |
%getitem_215 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_107, 1), kwargs = {}) | |
%add_336 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_214, 1e-05), kwargs = {}) | |
%rsqrt_107 : [#users=1] = call_function[target=torch.ops.aten.rsqrt](args = (%add_336,), kwargs = {}) | |
%sub_107 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%add_335, %getitem_215), kwargs = {}) | |
%mul_232 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_107, %rsqrt_107), kwargs = {}) | |
%_param_constant674 : [#users=1] = get_attr[target=_param_constant674] | |
%mul_233 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%mul_232, %_param_constant674), kwargs = {}) | |
%_param_constant675 : [#users=1] = get_attr[target=_param_constant675] | |
%add_337 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_233, %_param_constant675), kwargs = {}) | |
%_param_constant676 : [#users=1] = get_attr[target=_param_constant676] | |
%t_213 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant676,), kwargs = {}) | |
%view_561 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_337, [8192, 320]), kwargs = {}) | |
%_param_constant677 : [#users=1] = get_attr[target=_param_constant677] | |
%addmm_101 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant677, %view_561, %t_213), kwargs = {}) | |
%view_562 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%addmm_101, [2, 4096, 2560]), kwargs = {}) | |
%slice_81 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_562, -1, 0, 1280), kwargs = {}) | |
%slice_82 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%view_562, -1, 1280, 2560), kwargs = {}) | |
%gelu_15 : [#users=1] = call_function[target=torch.ops.aten.gelu](args = (%slice_82,), kwargs = {}) | |
%mul_234 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%slice_81, %gelu_15), kwargs = {}) | |
%_param_constant678 : [#users=1] = get_attr[target=_param_constant678] | |
%t_214 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant678,), kwargs = {}) | |
%view_563 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_234, [8192, 1280]), kwargs = {}) | |
%_param_constant679 : [#users=1] = get_attr[target=_param_constant679] | |
%addmm_102 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant679, %view_563, %t_214), kwargs = {}) | |
%view_564 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_102, [2, 4096, 320]), kwargs = {}) | |
%add_338 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%view_564, %add_335), kwargs = {}) | |
%_param_constant680 : [#users=1] = get_attr[target=_param_constant680] | |
%t_215 : [#users=1] = call_function[target=torch.ops.aten.t](args = (%_param_constant680,), kwargs = {}) | |
%view_565 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%add_338, [8192, 320]), kwargs = {}) | |
%_param_constant681 : [#users=1] = get_attr[target=_param_constant681] | |
%addmm_103 : [#users=1] = call_function[target=torch.ops.aten.addmm](args = (%_param_constant681, %view_565, %t_215), kwargs = {}) | |
%view_566 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%addmm_103, [2, 4096, 320]), kwargs = {}) | |
%view_567 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%view_566, [2, 64, 64, 320]), kwargs = {}) | |
%permute_159 : [#users=1] = call_function[target=torch.ops.aten.permute](args = (%view_567, [0, 3, 1, 2]), kwargs = {}) | |
%clone_143 : [#users=1] = call_function[target=torch.ops.aten.clone](args = (%permute_159,), kwargs = {memory_format: torch.contiguous_format}) | |
%add_339 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%clone_143, %div_22), kwargs = {}) | |
%view_568 : [#users=2] = call_function[target=torch.ops.aten.view](args = (%add_339, [2, 32, 10, 4096]), kwargs = {}) | |
%var_mean_108 : [#users=2] = call_function[target=torch.ops.aten.var_mean](args = (%view_568, [2, 3]), kwargs = {correction: 0, keepdim: True}) | |
%getitem_216 : [#users=1] = call_function[target=operator.getitem](args = (%var_mean_108, 0), kwargs = {}) | |
%getitem_217 : [#users=2] = call_function[target=operator.getitem](args = (%var_mean_108, 1), kwargs = {}) | |
%add_340 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%getitem_216, 1e-05), kwargs = {}) | |
%rsqrt_108 : [#users=2] = call_function[target=torch.ops.aten.rsqrt](args = (%add_340,), kwargs = {}) | |
%sub_108 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%view_568, %getitem_217), kwargs = {}) | |
%mul_235 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%sub_108, %rsqrt_108), kwargs = {}) | |
%view_569 : [#users=1] = call_function[target=torch.ops.aten.view](args = (%mul_235, [2, 320, 64, 64]), kwargs = {}) | |
%_param_constant682 : [#users=1] = get_attr[target=_param_constant682] | |
%unsqueeze_406 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant682, 0), kwargs = {}) | |
%unsqueeze_407 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_406, 2), kwargs = {}) | |
%unsqueeze_408 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_407, 3), kwargs = {}) | |
%_param_constant683 : [#users=1] = get_attr[target=_param_constant683] | |
%unsqueeze_409 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%_param_constant683, 0), kwargs = {}) | |
%unsqueeze_410 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_409, 2), kwargs = {}) | |
%unsqueeze_411 : [#users=1] = call_function[target=torch.ops.aten.unsqueeze](args = (%unsqueeze_410, 3), kwargs = {}) | |
%mul_236 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%view_569, %unsqueeze_411), kwargs = {}) | |
%add_341 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%mul_236, %unsqueeze_408), kwargs = {}) | |
%squeeze_240 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%getitem_217, 3), kwargs = {}) | |
%squeeze_241 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_240, 2), kwargs = {}) | |
%squeeze_242 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%rsqrt_108, 3), kwargs = {}) | |
%squeeze_243 : [#users=1] = call_function[target=torch.ops.aten.squeeze](args = (%squeeze_242, 2), kwargs = {}) | |
%detach_152 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_241,), kwargs = {}) | |
%detach_153 : [#users=0] = call_function[target=torch.ops.aten.detach](args = (%squeeze_243,), kwargs = {}) | |
%silu_67 : [#users=1] = call_function[target=torch.ops.aten.silu](args = (%add_341,), kwargs = {}) | |
%_param_constant684 : [#users=1] = get_attr[target=_param_constant684] | |
%_param_constant685 : [#users=1] = get_attr[target=_param_constant685] | |
%convolution_65 : [#users=2] = call_function[target=torch.ops.aten.convolution](args = (%silu_67, %_param_constant684, %_param_constant685, [1, 1], [1, 1], [1, 1], False, [0, 0], 1), kwargs = {}) | |
%slice_83 : [#users=2] = call_function[target=torch.ops.aten.slice](args = (%convolution_65, 0, 0, 1), kwargs = {}) | |
%slice_84 : [#users=1] = call_function[target=torch.ops.aten.slice](args = (%convolution_65, 0, 1, 2), kwargs = {}) | |
%sub_109 : [#users=1] = call_function[target=torch.ops.aten.sub](args = (%slice_84, %slice_83), kwargs = {}) | |
%mul_237 : [#users=1] = call_function[target=torch.ops.aten.mul](args = (%arg3_1, %sub_109), kwargs = {}) | |
%add_342 : [#users=1] = call_function[target=torch.ops.aten.add](args = (%slice_83, %mul_237), kwargs = {}) | |
return add_342 |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {torch.debug_module_name = "_lambda"} { | |
func.func @forward(%arg0: !torch.vtensor<[1,4,64,64],f16>, %arg1: !torch.vtensor<[1],f16>, %arg2: !torch.vtensor<[2,64,1024],f16>, %arg3: !torch.vtensor<[],f32>) -> !torch.vtensor<[1,4,64,64],f16> { | |
%none = torch.constant.none | |
%int7 = torch.constant.int 7 | |
%false = torch.constant.bool false | |
%true = torch.constant.bool true | |
%int40960 = torch.constant.int 40960 | |
%int10 = torch.constant.int 10 | |
%int4096 = torch.constant.int 4096 | |
%float1.000000e00 = torch.constant.float 1.000000e+00 | |
%int5 = torch.constant.int 5 | |
%int0 = torch.constant.int 0 | |
%int1 = torch.constant.int 1 | |
%int320 = torch.constant.int 320 | |
%int81920 = torch.constant.int 81920 | |
%int20 = torch.constant.int 20 | |
%int122880 = torch.constant.int 122880 | |
%int30 = torch.constant.int 30 | |
%int640 = torch.constant.int 640 | |
%int20480 = torch.constant.int 20480 | |
%int1024 = torch.constant.int 1024 | |
%int30720 = torch.constant.int 30720 | |
%int40 = torch.constant.int 40 | |
%int61440 = torch.constant.int 61440 | |
%int60 = torch.constant.int 60 | |
%int1280 = torch.constant.int 1280 | |
%int10240 = torch.constant.int 10240 | |
%int256 = torch.constant.int 256 | |
%int15360 = torch.constant.int 15360 | |
%int80 = torch.constant.int 80 | |
%int2560 = torch.constant.int 2560 | |
%int64 = torch.constant.int 64 | |
%int5120 = torch.constant.int 5120 | |
%int160 = torch.constant.int 160 | |
%0 = torch.vtensor.literal(dense<[-2.388000e-03, 3.170010e-03, 4.332070e-04, 1.795770e-03]> : tensor<4xf16>) : !torch.vtensor<[4],f16> | |
%1 = torch.vtensor.literal(dense_resource<__elided__> : tensor<4x320x3x3xf16>) : !torch.vtensor<[4,320,3,3],f16> | |
%2 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%3 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%4 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%5 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%6 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%7 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%8 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%9 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16> | |
%10 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%11 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%12 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%13 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%14 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%15 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%16 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%17 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%18 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%19 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%20 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%21 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%22 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%23 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%24 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%25 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%26 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%27 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%28 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%29 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%30 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%31 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf16>) : !torch.vtensor<[320,640,1,1],f16> | |
%32 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%33 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16> | |
%34 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%35 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%36 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%37 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%38 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%39 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf16>) : !torch.vtensor<[320,640,3,3],f16> | |
%40 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%41 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%42 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%43 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%44 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%45 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%46 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%47 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16> | |
%48 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%49 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%50 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%51 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%52 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%53 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%54 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%55 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%56 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%57 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%58 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%59 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%60 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%61 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%62 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%63 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%64 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%65 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%66 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%67 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%68 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%69 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x1x1xf16>) : !torch.vtensor<[320,640,1,1],f16> | |
%70 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%71 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16> | |
%72 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%73 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%74 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%75 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%76 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%77 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x640x3x3xf16>) : !torch.vtensor<[320,640,3,3],f16> | |
%78 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%79 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%80 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%81 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%82 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%83 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%84 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%85 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16> | |
%86 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%87 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%88 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%89 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%90 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%91 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%92 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%93 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%94 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%95 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%96 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%97 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%98 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%99 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%100 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%101 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%102 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%103 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%104 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%105 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%106 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%107 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x1x1xf16>) : !torch.vtensor<[320,960,1,1],f16> | |
%108 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%109 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16> | |
%110 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%111 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%112 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%113 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%114 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%115 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x960x3x3xf16>) : !torch.vtensor<[320,960,3,3],f16> | |
%116 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> | |
%117 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> | |
%118 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%119 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16> | |
%120 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%121 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%122 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%123 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16> | |
%124 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> | |
%125 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16> | |
%126 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%127 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%128 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%129 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%130 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%131 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%132 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%133 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%134 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%135 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%136 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%137 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%138 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%139 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%140 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%141 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%142 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%143 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%144 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%145 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%146 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%147 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x1x1xf16>) : !torch.vtensor<[640,960,1,1],f16> | |
%148 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%149 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16> | |
%150 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%151 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%152 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%153 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> | |
%154 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%155 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x960x3x3xf16>) : !torch.vtensor<[640,960,3,3],f16> | |
%156 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> | |
%157 = torch.vtensor.literal(dense_resource<__elided__> : tensor<960xf16>) : !torch.vtensor<[960],f16> | |
%158 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%159 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%160 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%161 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16> | |
%162 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> | |
%163 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16> | |
%164 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%165 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%166 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%167 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%168 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%169 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%170 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%171 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%172 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%173 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%174 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%175 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%176 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%177 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%178 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%179 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%180 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%181 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%182 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%183 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%184 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%185 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x1x1xf16>) : !torch.vtensor<[640,1280,1,1],f16> | |
%186 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%187 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16> | |
%188 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%189 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%190 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%191 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> | |
%192 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%193 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280x3x3xf16>) : !torch.vtensor<[640,1280,3,3],f16> | |
%194 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%195 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%196 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%197 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%198 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%199 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16> | |
%200 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> | |
%201 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16> | |
%202 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%203 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%204 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%205 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%206 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%207 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%208 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%209 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%210 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%211 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%212 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%213 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%214 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%215 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%216 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%217 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%218 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%219 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%220 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%221 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%222 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%223 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x1x1xf16>) : !torch.vtensor<[640,1920,1,1],f16> | |
%224 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%225 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16> | |
%226 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%227 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%228 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%229 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> | |
%230 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%231 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1920x3x3xf16>) : !torch.vtensor<[640,1920,3,3],f16> | |
%232 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> | |
%233 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> | |
%234 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%235 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%236 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%237 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%238 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%239 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16> | |
%240 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> | |
%241 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16> | |
%242 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%243 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%244 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%245 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%246 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%247 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%248 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%249 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%250 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%251 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%252 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%253 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%254 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%255 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%256 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%257 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%258 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%259 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%260 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%261 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%262 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%263 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x1x1xf16>) : !torch.vtensor<[1280,1920,1,1],f16> | |
%264 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%265 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%266 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%267 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%268 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%269 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%270 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%271 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1920x3x3xf16>) : !torch.vtensor<[1280,1920,3,3],f16> | |
%272 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> | |
%273 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1920xf16>) : !torch.vtensor<[1920],f16> | |
%274 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%275 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%276 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%277 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16> | |
%278 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> | |
%279 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16> | |
%280 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%281 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%282 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%283 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%284 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%285 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%286 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%287 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%288 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%289 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%290 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%291 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%292 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%293 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%294 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%295 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%296 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%297 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%298 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%299 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%300 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%301 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16> | |
%302 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%303 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%304 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%305 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%306 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%307 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%308 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%309 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16> | |
%310 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%311 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%312 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%313 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%314 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%315 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16> | |
%316 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> | |
%317 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16> | |
%318 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%319 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%320 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%321 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%322 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%323 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%324 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%325 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%326 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%327 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%328 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%329 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%330 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%331 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%332 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%333 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%334 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%335 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%336 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%337 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%338 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%339 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16> | |
%340 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%341 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%342 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%343 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%344 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%345 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%346 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%347 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16> | |
%348 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%349 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%350 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%351 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%352 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%353 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16> | |
%354 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%355 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%356 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%357 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%358 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%359 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%360 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%361 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16> | |
%362 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%363 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%364 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%365 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16> | |
%366 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%367 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%368 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%369 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%370 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%371 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%372 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%373 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16> | |
%374 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%375 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%376 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%377 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x1x1xf16>) : !torch.vtensor<[1280,2560,1,1],f16> | |
%378 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%379 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%380 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%381 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%382 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%383 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%384 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%385 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x2560x3x3xf16>) : !torch.vtensor<[1280,2560,3,3],f16> | |
%386 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%387 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%388 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%389 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%390 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%391 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%392 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%393 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%394 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%395 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%396 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%397 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%398 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%399 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%400 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%401 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16> | |
%402 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> | |
%403 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16> | |
%404 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%405 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%406 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%407 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%408 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%409 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%410 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%411 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%412 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%413 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%414 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%415 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%416 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%417 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%418 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%419 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%420 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%421 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%422 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%423 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%424 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%425 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%426 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%427 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%428 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%429 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%430 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%431 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%432 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%433 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%434 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%435 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%436 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%437 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%438 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%439 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%440 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%441 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%442 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%443 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%444 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%445 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%446 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%447 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%448 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%449 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%450 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%451 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%452 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%453 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%454 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%455 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%456 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%457 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%458 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%459 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16> | |
%460 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> | |
%461 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16> | |
%462 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%463 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%464 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%465 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%466 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%467 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%468 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%469 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%470 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%471 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%472 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%473 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%474 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%475 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%476 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%477 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%478 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%479 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%480 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%481 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%482 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%483 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%484 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%485 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%486 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%487 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%488 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%489 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%490 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%491 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%492 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%493 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%494 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%495 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x5120xf16>) : !torch.vtensor<[1280,5120],f16> | |
%496 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240xf16>) : !torch.vtensor<[10240],f16> | |
%497 = torch.vtensor.literal(dense_resource<__elided__> : tensor<10240x1280xf16>) : !torch.vtensor<[10240,1280],f16> | |
%498 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%499 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%500 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%501 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%502 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%503 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1024xf16>) : !torch.vtensor<[1280,1024],f16> | |
%504 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%505 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%506 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%507 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%508 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%509 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%510 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%511 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%512 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%513 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%514 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%515 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%516 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%517 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%518 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%519 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x1x1xf16>) : !torch.vtensor<[1280,640,1,1],f16> | |
%520 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%521 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280x3x3xf16>) : !torch.vtensor<[1280,1280,3,3],f16> | |
%522 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%523 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%524 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%525 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%526 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%527 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x640x3x3xf16>) : !torch.vtensor<[1280,640,3,3],f16> | |
%528 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%529 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%530 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%531 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16> | |
%532 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%533 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%534 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%535 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16> | |
%536 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> | |
%537 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16> | |
%538 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%539 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%540 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%541 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%542 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%543 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%544 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%545 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%546 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%547 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%548 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%549 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%550 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%551 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%552 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%553 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%554 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%555 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%556 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%557 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%558 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%559 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16> | |
%560 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%561 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%562 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%563 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> | |
%564 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%565 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16> | |
%566 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%567 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%568 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%569 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%570 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%571 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x2560xf16>) : !torch.vtensor<[640,2560],f16> | |
%572 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120xf16>) : !torch.vtensor<[5120],f16> | |
%573 = torch.vtensor.literal(dense_resource<__elided__> : tensor<5120x640xf16>) : !torch.vtensor<[5120,640],f16> | |
%574 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%575 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%576 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%577 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%578 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%579 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1024xf16>) : !torch.vtensor<[640,1024],f16> | |
%580 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%581 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%582 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%583 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%584 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%585 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%586 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%587 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%588 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%589 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%590 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%591 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640xf16>) : !torch.vtensor<[640,640],f16> | |
%592 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%593 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%594 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%595 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x1x1xf16>) : !torch.vtensor<[640,320,1,1],f16> | |
%596 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%597 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x640x3x3xf16>) : !torch.vtensor<[640,640,3,3],f16> | |
%598 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%599 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%600 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%601 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x1280xf16>) : !torch.vtensor<[640,1280],f16> | |
%602 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640xf16>) : !torch.vtensor<[640],f16> | |
%603 = torch.vtensor.literal(dense_resource<__elided__> : tensor<640x320x3x3xf16>) : !torch.vtensor<[640,320,3,3],f16> | |
%604 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%605 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%606 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%607 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16> | |
%608 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%609 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%610 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%611 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%612 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%613 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16> | |
%614 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%615 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%616 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%617 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%618 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%619 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%620 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%621 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%622 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%623 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%624 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%625 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%626 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%627 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%628 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%629 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%630 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%631 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%632 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%633 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%634 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%635 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16> | |
%636 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%637 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%638 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%639 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%640 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%641 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16> | |
%642 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%643 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%644 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%645 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%646 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%647 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%648 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560xf16>) : !torch.vtensor<[2560],f16> | |
%649 = torch.vtensor.literal(dense_resource<__elided__> : tensor<2560x320xf16>) : !torch.vtensor<[2560,320],f16> | |
%650 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%651 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%652 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%653 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%654 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%655 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1024xf16>) : !torch.vtensor<[320,1024],f16> | |
%656 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%657 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%658 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%659 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%660 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%661 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%662 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%663 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%664 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%665 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%666 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%667 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320xf16>) : !torch.vtensor<[320,320],f16> | |
%668 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%669 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%670 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%671 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16> | |
%672 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%673 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%674 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%675 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x1280xf16>) : !torch.vtensor<[320,1280],f16> | |
%676 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%677 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x320x3x3xf16>) : !torch.vtensor<[320,320,3,3],f16> | |
%678 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%679 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%680 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320xf16>) : !torch.vtensor<[320],f16> | |
%681 = torch.vtensor.literal(dense_resource<__elided__> : tensor<320x4x3x3xf16>) : !torch.vtensor<[320,4,3,3],f16> | |
%682 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%683 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x1280xf16>) : !torch.vtensor<[1280,1280],f16> | |
%684 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280xf16>) : !torch.vtensor<[1280],f16> | |
%685 = torch.vtensor.literal(dense_resource<__elided__> : tensor<1280x320xf16>) : !torch.vtensor<[1280,320],f16> | |
%float-9.210340e00 = torch.constant.float -9.2103403719761836 | |
%int-1 = torch.constant.int -1 | |
%int6 = torch.constant.int 6 | |
%int-2 = torch.constant.int -2 | |
%str = torch.constant.str "none" | |
%int2 = torch.constant.int 2 | |
%int9223372036854775807 = torch.constant.int 9223372036854775807 | |
%int32 = torch.constant.int 32 | |
%int3 = torch.constant.int 3 | |
%float1.000000e-05 = torch.constant.float 1.000000e-05 | |
%float9.999990e-07 = torch.constant.float 9.9999999999999995E-7 | |
%int8192 = torch.constant.int 8192 | |
%float1.250000e-01 = torch.constant.float 1.250000e-01 | |
%int128 = torch.constant.int 128 | |
%int2048 = torch.constant.int 2048 | |
%int16 = torch.constant.int 16 | |
%int512 = torch.constant.int 512 | |
%int8 = torch.constant.int 8 | |
%float2.000000e00 = torch.constant.float 2.000000e+00 | |
%int1920 = torch.constant.int 1920 | |
%int960 = torch.constant.int 960 | |
%cpu = torch.constant.device "cpu" | |
%686 = torch.prim.ListConstruct %arg0, %arg0 : (!torch.vtensor<[1,4,64,64],f16>, !torch.vtensor<[1,4,64,64],f16>) -> !torch.list<vtensor> | |
%687 = torch.aten.cat %686, %int0 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,4,64,64],f16> | |
%688 = torch.prim.ListConstruct %int2 : (!torch.int) -> !torch.list<int> | |
%689 = torch.aten.broadcast_to %arg1, %688 : !torch.vtensor<[1],f16>, !torch.list<int> -> !torch.vtensor<[2],f16> | |
%690 = torch.aten.arange.start_step %int0, %int160, %int1, %int5, %none, %cpu, %false : !torch.int, !torch.int, !torch.int, !torch.int, !torch.none, !torch.Device, !torch.bool -> !torch.vtensor<[160],f16> | |
%691 = torch.aten.mul.Scalar %690, %float-9.210340e00 : !torch.vtensor<[160],f16>, !torch.float -> !torch.vtensor<[160],f16> | |
%692 = torch.aten.div.Scalar %691, %int160 : !torch.vtensor<[160],f16>, !torch.int -> !torch.vtensor<[160],f16> | |
%693 = torch.aten.exp %692 : !torch.vtensor<[160],f16> -> !torch.vtensor<[160],f16> | |
%694 = torch.aten.unsqueeze %689, %int1 : !torch.vtensor<[2],f16>, !torch.int -> !torch.vtensor<[2,1],f16> | |
%695 = torch.aten.unsqueeze %693, %int0 : !torch.vtensor<[160],f16>, !torch.int -> !torch.vtensor<[1,160],f16> | |
%696 = torch.aten.mul.Tensor %694, %695 : !torch.vtensor<[2,1],f16>, !torch.vtensor<[1,160],f16> -> !torch.vtensor<[2,160],f16> | |
%697 = torch.aten.mul.Scalar %696, %int1 : !torch.vtensor<[2,160],f16>, !torch.int -> !torch.vtensor<[2,160],f16> | |
%698 = torch.aten.sin %697 : !torch.vtensor<[2,160],f16> -> !torch.vtensor<[2,160],f16> | |
%699 = torch.aten.cos %697 : !torch.vtensor<[2,160],f16> -> !torch.vtensor<[2,160],f16> | |
%700 = torch.prim.ListConstruct %698, %699 : (!torch.vtensor<[2,160],f16>, !torch.vtensor<[2,160],f16>) -> !torch.list<vtensor> | |
%701 = torch.aten.cat %700, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f16> | |
%702 = torch.aten.slice.Tensor %701, %int1, %int160, %int9223372036854775807, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f16> | |
%703 = torch.aten.slice.Tensor %701, %int1, %int0, %int160, %int1 : !torch.vtensor<[2,320],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,160],f16> | |
%704 = torch.prim.ListConstruct %702, %703 : (!torch.vtensor<[2,160],f16>, !torch.vtensor<[2,160],f16>) -> !torch.list<vtensor> | |
%705 = torch.aten.cat %704, %int-1 : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[2,320],f16> | |
%706 = torch.aten.transpose.int %685, %int0, %int1 : !torch.vtensor<[1280,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,1280],f16> | |
%707 = torch.aten.mm %705, %706 : !torch.vtensor<[2,320],f16>, !torch.vtensor<[320,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%708 = torch.aten.mul.Scalar %684, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%709 = torch.aten.add.Tensor %708, %707, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> | |
%710 = torch.aten.sigmoid %709 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%711 = torch.aten.mul.Tensor %710, %709 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%712 = torch.aten.transpose.int %683, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%713 = torch.aten.mm %711, %712 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%714 = torch.aten.mul.Scalar %682, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%715 = torch.aten.add.Tensor %714, %713, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> | |
%716 = torch.prim.ListConstruct %int1, %int1 : (!torch.int, !torch.int) -> !torch.list<int> | |
%717 = torch.prim.ListConstruct %int0, %int0 : (!torch.int, !torch.int) -> !torch.list<int> | |
%718 = torch.aten.convolution %687, %681, %680, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,4,64,64],f16>, !torch.vtensor<[320,4,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%719 = torch.prim.ListConstruct %int2, %int32, %int10, %int4096 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%720 = torch.aten.view %718, %719 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> | |
%721 = torch.prim.ListConstruct %int2, %int3 : (!torch.int, !torch.int) -> !torch.list<int> | |
%722 = torch.aten.to.dtype %720, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> | |
%723 = torch.aten.sum.dim_IntList %722, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%724 = torch.aten.div.Scalar %723, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%725 = torch.aten.sub.Tensor %722, %724, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> | |
%726 = torch.aten.mul.Tensor %725, %725 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> | |
%727 = torch.aten.sum.dim_IntList %726, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%728 = torch.aten.div.Scalar %727, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%729 = torch.aten.to.dtype %728, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%730 = torch.aten.sum.dim_IntList %720, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%731 = torch.aten.div.Scalar %730, %int40960 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%732 = torch.aten.add.Scalar %729, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%733 = torch.aten.rsqrt %732 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%734 = torch.aten.sub.Tensor %720, %731, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,10,4096],f16> | |
%735 = torch.aten.mul.Tensor %734, %733 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,10,4096],f16> | |
%736 = torch.prim.ListConstruct %int2, %int320, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%737 = torch.aten.view %735, %736 : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> | |
%738 = torch.aten.unsqueeze %679, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%739 = torch.aten.unsqueeze %738, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%740 = torch.aten.unsqueeze %739, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%741 = torch.aten.unsqueeze %678, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%742 = torch.aten.unsqueeze %741, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%743 = torch.aten.unsqueeze %742, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%744 = torch.aten.mul.Tensor %737, %743 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%745 = torch.aten.add.Tensor %744, %740, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%746 = torch.aten.sigmoid %745 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%747 = torch.aten.mul.Tensor %746, %745 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%748 = torch.aten.convolution %747, %677, %676, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%749 = torch.aten.sigmoid %715 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%750 = torch.aten.mul.Tensor %749, %715 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%751 = torch.aten.transpose.int %675, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> | |
%752 = torch.aten.mm %750, %751 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16> | |
%753 = torch.aten.mul.Scalar %674, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%754 = torch.aten.add.Tensor %753, %752, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16> | |
%755 = torch.aten.unsqueeze %754, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16> | |
%756 = torch.aten.unsqueeze %755, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16> | |
%757 = torch.aten.add.Tensor %748, %756, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%758 = torch.aten.view %757, %719 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> | |
%759 = torch.aten.to.dtype %758, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> | |
%760 = torch.aten.sum.dim_IntList %759, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%761 = torch.aten.div.Scalar %760, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%762 = torch.aten.sub.Tensor %759, %761, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> | |
%763 = torch.aten.mul.Tensor %762, %762 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> | |
%764 = torch.aten.sum.dim_IntList %763, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%765 = torch.aten.div.Scalar %764, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%766 = torch.aten.to.dtype %765, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%767 = torch.aten.sum.dim_IntList %758, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%768 = torch.aten.div.Scalar %767, %int40960 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%769 = torch.aten.add.Scalar %766, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%770 = torch.aten.rsqrt %769 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%771 = torch.aten.sub.Tensor %758, %768, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,10,4096],f16> | |
%772 = torch.aten.mul.Tensor %771, %770 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,10,4096],f16> | |
%773 = torch.aten.view %772, %736 : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> | |
%774 = torch.aten.unsqueeze %673, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%775 = torch.aten.unsqueeze %774, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%776 = torch.aten.unsqueeze %775, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%777 = torch.aten.unsqueeze %672, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%778 = torch.aten.unsqueeze %777, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%779 = torch.aten.unsqueeze %778, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%780 = torch.aten.mul.Tensor %773, %779 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%781 = torch.aten.add.Tensor %780, %776, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%782 = torch.aten.sigmoid %781 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%783 = torch.aten.mul.Tensor %782, %781 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%784 = torch.aten.convolution %783, %671, %670, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%785 = torch.aten.add.Tensor %718, %784, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%786 = torch.aten.div.Scalar %785, %float1.000000e00 : !torch.vtensor<[2,320,64,64],f16>, !torch.float -> !torch.vtensor<[2,320,64,64],f16> | |
%787 = torch.aten.view %786, %719 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> | |
%788 = torch.aten.to.dtype %787, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> | |
%789 = torch.aten.sum.dim_IntList %788, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%790 = torch.aten.div.Scalar %789, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%791 = torch.aten.sub.Tensor %788, %790, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> | |
%792 = torch.aten.mul.Tensor %791, %791 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> | |
%793 = torch.aten.sum.dim_IntList %792, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%794 = torch.aten.div.Scalar %793, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%795 = torch.aten.to.dtype %794, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%796 = torch.aten.sum.dim_IntList %787, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%797 = torch.aten.div.Scalar %796, %int40960 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%798 = torch.aten.add.Scalar %795, %float9.999990e-07, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%799 = torch.aten.rsqrt %798 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%800 = torch.aten.sub.Tensor %787, %797, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,10,4096],f16> | |
%801 = torch.aten.mul.Tensor %800, %799 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,10,4096],f16> | |
%802 = torch.aten.view %801, %736 : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> | |
%803 = torch.aten.unsqueeze %669, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%804 = torch.aten.unsqueeze %803, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%805 = torch.aten.unsqueeze %804, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%806 = torch.aten.unsqueeze %668, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%807 = torch.aten.unsqueeze %806, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%808 = torch.aten.unsqueeze %807, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%809 = torch.aten.mul.Tensor %802, %808 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%810 = torch.aten.add.Tensor %809, %805, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%811 = torch.prim.ListConstruct %int0, %int2, %int3, %int1 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%812 = torch.aten.permute %810, %811 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> | |
%813 = torch.prim.ListConstruct %int2, %int4096, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%814 = torch.aten.view %812, %813 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%815 = torch.aten.transpose.int %667, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%816 = torch.aten.broadcast_to %814, %813 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%817 = torch.aten.view %816, %813 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%818 = torch.prim.ListConstruct %int2, %int320, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%819 = torch.aten.broadcast_to %815, %818 : !torch.vtensor<[320,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,320],f16> | |
%820 = torch.aten.view %819, %818 : !torch.vtensor<[2,320,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,320],f16> | |
%821 = torch.aten.bmm %817, %820 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,320,320],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%822 = torch.aten.view %821, %813 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%823 = torch.aten.add.Tensor %822, %666, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%824 = torch.aten.to.dtype %823, %int7, %false, %false, %none : !torch.vtensor<[2,4096,320],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,320],f64> | |
%825 = torch.aten.sum.dim_IntList %824, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%826 = torch.aten.div.Scalar %825, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%827 = torch.aten.sub.Tensor %824, %826, %float1.000000e00 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,1],f64>, !torch.float -> !torch.vtensor<[2,4096,320],f64> | |
%828 = torch.aten.mul.Tensor %827, %827 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,320],f64> -> !torch.vtensor<[2,4096,320],f64> | |
%829 = torch.aten.sum.dim_IntList %828, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%830 = torch.aten.div.Scalar %829, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%831 = torch.aten.to.dtype %830, %int5, %false, %false, %none : !torch.vtensor<[2,4096,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%832 = torch.aten.sum.dim_IntList %823, %688, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%833 = torch.aten.div.Scalar %832, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%834 = torch.aten.add.Scalar %831, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%835 = torch.aten.rsqrt %834 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> | |
%836 = torch.aten.sub.Tensor %823, %833, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%837 = torch.aten.mul.Tensor %836, %835 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%838 = torch.aten.mul.Tensor %837, %665 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%839 = torch.aten.add.Tensor %838, %664, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%840 = torch.aten.transpose.int %663, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%841 = torch.prim.ListConstruct %int8192, %int320 : (!torch.int, !torch.int) -> !torch.list<int> | |
%842 = torch.aten.view %839, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%843 = torch.aten.mm %842, %840 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%844 = torch.aten.view %843, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%845 = torch.prim.ListConstruct %int2, %int4096, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%846 = torch.aten.view %844, %845 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%847 = torch.prim.ListConstruct %int0, %int2, %int1, %int3 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%848 = torch.aten.permute %846, %847 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%849 = torch.aten.clone %848, %int0 : !torch.vtensor<[2,5,4096,64],f16>, !torch.int -> !torch.vtensor<[2,5,4096,64],f16> | |
%850 = torch.prim.ListConstruct %int10, %int4096, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%851 = torch.aten.view %849, %850 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[10,4096,64],f16> | |
%852 = torch.aten.transpose.int %662, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%853 = torch.aten.view %839, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%854 = torch.aten.mm %853, %852 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%855 = torch.aten.view %854, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%856 = torch.aten.transpose.int %661, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%857 = torch.aten.view %839, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%858 = torch.aten.mm %857, %856 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%859 = torch.aten.view %858, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%860 = torch.aten.view %855, %845 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%861 = torch.aten.permute %860, %847 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%862 = torch.aten.clone %861, %int0 : !torch.vtensor<[2,5,4096,64],f16>, !torch.int -> !torch.vtensor<[2,5,4096,64],f16> | |
%863 = torch.aten.view %862, %850 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[10,4096,64],f16> | |
%864 = torch.aten.view %859, %845 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%865 = torch.aten.permute %864, %847 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%866 = torch.aten.clone %865, %int0 : !torch.vtensor<[2,5,4096,64],f16>, !torch.int -> !torch.vtensor<[2,5,4096,64],f16> | |
%867 = torch.aten.view %866, %850 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[10,4096,64],f16> | |
%868 = torch.prim.ListConstruct %int10, %int4096, %int4096 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%869 = torch.aten.empty.memory_format %868, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,4096],f32> | |
%870 = torch.aten.transpose.int %863, %int-1, %int-2 : !torch.vtensor<[10,4096,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[10,64,4096],f16> | |
%871 = torch.aten.bmm %851, %870 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,64,4096],f16> -> !torch.vtensor<[10,4096,4096],f16> | |
%872 = torch.aten.mul.Scalar %871, %float1.250000e-01 : !torch.vtensor<[10,4096,4096],f16>, !torch.float -> !torch.vtensor<[10,4096,4096],f16> | |
%873 = torch.aten.to.dtype %869, %int5, %false, %false, %none : !torch.vtensor<[10,4096,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,4096],f16> | |
%874 = torch.aten.add.Tensor %872, %869, %int0 : !torch.vtensor<[10,4096,4096],f16>, !torch.vtensor<[10,4096,4096],f32>, !torch.int -> !torch.vtensor<[10,4096,4096],f16> | |
%values, %indices = torch.aten.max.dim %874, %int-1, %true : !torch.vtensor<[10,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,4096,1],f16>, !torch.vtensor<[10,4096,1],si64> | |
%875 = torch.aten.sub.Tensor %874, %values, %float1.000000e00 : !torch.vtensor<[10,4096,4096],f16>, !torch.vtensor<[10,4096,1],f16>, !torch.float -> !torch.vtensor<[10,4096,4096],f16> | |
%876 = torch.aten.exp %875 : !torch.vtensor<[10,4096,4096],f16> -> !torch.vtensor<[10,4096,4096],f16> | |
%877 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%878 = torch.aten.sum.dim_IntList %876, %877, %true, %none : !torch.vtensor<[10,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,1],f16> | |
%879 = torch.aten.div.Tensor %876, %878 : !torch.vtensor<[10,4096,4096],f16>, !torch.vtensor<[10,4096,1],f16> -> !torch.vtensor<[10,4096,4096],f16> | |
%880 = torch.aten.bmm %879, %867 : !torch.vtensor<[10,4096,4096],f16>, !torch.vtensor<[10,4096,64],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%881 = torch.prim.ListConstruct %int2, %int5, %int4096, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%882 = torch.aten.view %880, %881 : !torch.vtensor<[10,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%883 = torch.aten.permute %882, %847 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%884 = torch.aten.clone %883, %int0 : !torch.vtensor<[2,4096,5,64],f16>, !torch.int -> !torch.vtensor<[2,4096,5,64],f16> | |
%885 = torch.aten.view %884, %813 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%886 = torch.aten.transpose.int %660, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%887 = torch.aten.view %885, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%888 = torch.aten.mm %887, %886 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%889 = torch.aten.mul.Scalar %659, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%890 = torch.aten.add.Tensor %889, %888, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> | |
%891 = torch.aten.view %890, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%892 = torch.aten.add.Tensor %891, %823, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%893 = torch.aten.to.dtype %892, %int7, %false, %false, %none : !torch.vtensor<[2,4096,320],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,320],f64> | |
%894 = torch.aten.sum.dim_IntList %893, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%895 = torch.aten.div.Scalar %894, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%896 = torch.aten.sub.Tensor %893, %895, %float1.000000e00 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,1],f64>, !torch.float -> !torch.vtensor<[2,4096,320],f64> | |
%897 = torch.aten.mul.Tensor %896, %896 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,320],f64> -> !torch.vtensor<[2,4096,320],f64> | |
%898 = torch.aten.sum.dim_IntList %897, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%899 = torch.aten.div.Scalar %898, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%900 = torch.aten.to.dtype %899, %int5, %false, %false, %none : !torch.vtensor<[2,4096,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%901 = torch.aten.sum.dim_IntList %892, %688, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%902 = torch.aten.div.Scalar %901, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%903 = torch.aten.add.Scalar %900, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%904 = torch.aten.rsqrt %903 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> | |
%905 = torch.aten.sub.Tensor %892, %902, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%906 = torch.aten.mul.Tensor %905, %904 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%907 = torch.aten.mul.Tensor %906, %658 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%908 = torch.aten.add.Tensor %907, %657, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%909 = torch.aten.transpose.int %656, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%910 = torch.aten.view %908, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%911 = torch.aten.mm %910, %909 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%912 = torch.aten.view %911, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%913 = torch.aten.view %912, %845 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%914 = torch.aten.permute %913, %847 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%915 = torch.aten.clone %914, %int0 : !torch.vtensor<[2,5,4096,64],f16>, !torch.int -> !torch.vtensor<[2,5,4096,64],f16> | |
%916 = torch.aten.view %915, %850 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[10,4096,64],f16> | |
%917 = torch.aten.transpose.int %655, %int0, %int1 : !torch.vtensor<[320,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,320],f16> | |
%918 = torch.prim.ListConstruct %int128, %int1024 : (!torch.int, !torch.int) -> !torch.list<int> | |
%919 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%920 = torch.aten.mm %919, %917 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[128,320],f16> | |
%921 = torch.prim.ListConstruct %int2, %int64, %int320 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%922 = torch.aten.view %920, %921 : !torch.vtensor<[128,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,320],f16> | |
%923 = torch.aten.transpose.int %654, %int0, %int1 : !torch.vtensor<[320,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,320],f16> | |
%924 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%925 = torch.aten.mm %924, %923 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[128,320],f16> | |
%926 = torch.aten.view %925, %921 : !torch.vtensor<[128,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,320],f16> | |
%927 = torch.prim.ListConstruct %int2, %int64, %int5, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%928 = torch.aten.view %922, %927 : !torch.vtensor<[2,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,5,64],f16> | |
%929 = torch.aten.permute %928, %847 : !torch.vtensor<[2,64,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,64,64],f16> | |
%930 = torch.aten.clone %929, %int0 : !torch.vtensor<[2,5,64,64],f16>, !torch.int -> !torch.vtensor<[2,5,64,64],f16> | |
%931 = torch.prim.ListConstruct %int10, %int64, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%932 = torch.aten.view %930, %931 : !torch.vtensor<[2,5,64,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,64],f16> | |
%933 = torch.aten.view %926, %927 : !torch.vtensor<[2,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,5,64],f16> | |
%934 = torch.aten.permute %933, %847 : !torch.vtensor<[2,64,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,64,64],f16> | |
%935 = torch.aten.clone %934, %int0 : !torch.vtensor<[2,5,64,64],f16>, !torch.int -> !torch.vtensor<[2,5,64,64],f16> | |
%936 = torch.aten.view %935, %931 : !torch.vtensor<[2,5,64,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,64],f16> | |
%937 = torch.aten.empty.memory_format %850, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,64],f32> | |
%938 = torch.aten.transpose.int %932, %int-1, %int-2 : !torch.vtensor<[10,64,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[10,64,64],f16> | |
%939 = torch.aten.bmm %916, %938 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,64,64],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%940 = torch.aten.mul.Scalar %939, %float1.250000e-01 : !torch.vtensor<[10,4096,64],f16>, !torch.float -> !torch.vtensor<[10,4096,64],f16> | |
%941 = torch.aten.to.dtype %937, %int5, %false, %false, %none : !torch.vtensor<[10,4096,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,64],f16> | |
%942 = torch.aten.add.Tensor %940, %937, %int0 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,4096,64],f32>, !torch.int -> !torch.vtensor<[10,4096,64],f16> | |
%values_0, %indices_1 = torch.aten.max.dim %942, %int-1, %true : !torch.vtensor<[10,4096,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,4096,1],f16>, !torch.vtensor<[10,4096,1],si64> | |
%943 = torch.aten.sub.Tensor %942, %values_0, %float1.000000e00 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,4096,1],f16>, !torch.float -> !torch.vtensor<[10,4096,64],f16> | |
%944 = torch.aten.exp %943 : !torch.vtensor<[10,4096,64],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%945 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%946 = torch.aten.sum.dim_IntList %944, %945, %true, %none : !torch.vtensor<[10,4096,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,1],f16> | |
%947 = torch.aten.div.Tensor %944, %946 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,4096,1],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%948 = torch.aten.bmm %947, %936 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,64,64],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%949 = torch.aten.view %948, %881 : !torch.vtensor<[10,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%950 = torch.aten.permute %949, %847 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%951 = torch.aten.clone %950, %int0 : !torch.vtensor<[2,4096,5,64],f16>, !torch.int -> !torch.vtensor<[2,4096,5,64],f16> | |
%952 = torch.aten.view %951, %813 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%953 = torch.aten.transpose.int %653, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%954 = torch.aten.view %952, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%955 = torch.aten.mm %954, %953 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%956 = torch.aten.mul.Scalar %652, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%957 = torch.aten.add.Tensor %956, %955, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> | |
%958 = torch.aten.view %957, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%959 = torch.aten.add.Tensor %958, %892, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%960 = torch.aten.to.dtype %959, %int7, %false, %false, %none : !torch.vtensor<[2,4096,320],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,320],f64> | |
%961 = torch.aten.sum.dim_IntList %960, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%962 = torch.aten.div.Scalar %961, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%963 = torch.aten.sub.Tensor %960, %962, %float1.000000e00 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,1],f64>, !torch.float -> !torch.vtensor<[2,4096,320],f64> | |
%964 = torch.aten.mul.Tensor %963, %963 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,320],f64> -> !torch.vtensor<[2,4096,320],f64> | |
%965 = torch.aten.sum.dim_IntList %964, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%966 = torch.aten.div.Scalar %965, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%967 = torch.aten.to.dtype %966, %int5, %false, %false, %none : !torch.vtensor<[2,4096,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%968 = torch.aten.sum.dim_IntList %959, %688, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%969 = torch.aten.div.Scalar %968, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%970 = torch.aten.add.Scalar %967, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%971 = torch.aten.rsqrt %970 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> | |
%972 = torch.aten.sub.Tensor %959, %969, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%973 = torch.aten.mul.Tensor %972, %971 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%974 = torch.aten.mul.Tensor %973, %651 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%975 = torch.aten.add.Tensor %974, %650, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%976 = torch.aten.transpose.int %649, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16> | |
%977 = torch.aten.view %975, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%978 = torch.aten.mm %977, %976 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16> | |
%979 = torch.aten.mul.Scalar %648, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16> | |
%980 = torch.aten.add.Tensor %979, %978, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16> | |
%981 = torch.prim.ListConstruct %int2, %int4096, %int2560 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%982 = torch.aten.view %980, %981 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16> | |
%983 = torch.aten.slice.Tensor %982, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> | |
%984 = torch.aten.slice.Tensor %982, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> | |
%985 = torch.aten.gelu %984, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16> | |
%986 = torch.aten.mul.Tensor %983, %985 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16> | |
%987 = torch.aten.transpose.int %647, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> | |
%988 = torch.prim.ListConstruct %int8192, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> | |
%989 = torch.aten.view %986, %988 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16> | |
%990 = torch.aten.mm %989, %987 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%991 = torch.aten.mul.Scalar %646, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%992 = torch.aten.add.Tensor %991, %990, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> | |
%993 = torch.aten.view %992, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%994 = torch.aten.add.Tensor %993, %959, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%995 = torch.aten.transpose.int %645, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%996 = torch.aten.view %994, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%997 = torch.aten.mm %996, %995 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%998 = torch.aten.mul.Scalar %644, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%999 = torch.aten.add.Tensor %998, %997, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> | |
%1000 = torch.aten.view %999, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1001 = torch.prim.ListConstruct %int2, %int64, %int64, %int320 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1002 = torch.aten.view %1000, %1001 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> | |
%1003 = torch.prim.ListConstruct %int0, %int3, %int1, %int2 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1004 = torch.aten.permute %1002, %1003 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> | |
%1005 = torch.aten.clone %1004, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1006 = torch.aten.add.Tensor %1005, %786, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1007 = torch.aten.view %1006, %719 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> | |
%1008 = torch.aten.to.dtype %1007, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> | |
%1009 = torch.aten.sum.dim_IntList %1008, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1010 = torch.aten.div.Scalar %1009, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1011 = torch.aten.sub.Tensor %1008, %1010, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> | |
%1012 = torch.aten.mul.Tensor %1011, %1011 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> | |
%1013 = torch.aten.sum.dim_IntList %1012, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1014 = torch.aten.div.Scalar %1013, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1015 = torch.aten.to.dtype %1014, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1016 = torch.aten.sum.dim_IntList %1007, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1017 = torch.aten.div.Scalar %1016, %int40960 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1018 = torch.aten.add.Scalar %1015, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1019 = torch.aten.rsqrt %1018 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1020 = torch.aten.sub.Tensor %1007, %1017, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,10,4096],f16> | |
%1021 = torch.aten.mul.Tensor %1020, %1019 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,10,4096],f16> | |
%1022 = torch.aten.view %1021, %736 : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> | |
%1023 = torch.aten.unsqueeze %643, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%1024 = torch.aten.unsqueeze %1023, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%1025 = torch.aten.unsqueeze %1024, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%1026 = torch.aten.unsqueeze %642, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%1027 = torch.aten.unsqueeze %1026, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%1028 = torch.aten.unsqueeze %1027, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%1029 = torch.aten.mul.Tensor %1022, %1028 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%1030 = torch.aten.add.Tensor %1029, %1025, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1031 = torch.aten.sigmoid %1030 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%1032 = torch.aten.mul.Tensor %1031, %1030 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%1033 = torch.aten.convolution %1032, %641, %640, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1034 = torch.aten.sigmoid %715 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%1035 = torch.aten.mul.Tensor %1034, %715 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%1036 = torch.aten.transpose.int %639, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> | |
%1037 = torch.aten.mm %1035, %1036 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[2,320],f16> | |
%1038 = torch.aten.mul.Scalar %638, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%1039 = torch.aten.add.Tensor %1038, %1037, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320],f16> | |
%1040 = torch.aten.unsqueeze %1039, %int2 : !torch.vtensor<[2,320],f16>, !torch.int -> !torch.vtensor<[2,320,1],f16> | |
%1041 = torch.aten.unsqueeze %1040, %int3 : !torch.vtensor<[2,320,1],f16>, !torch.int -> !torch.vtensor<[2,320,1,1],f16> | |
%1042 = torch.aten.add.Tensor %1033, %1041, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1043 = torch.aten.view %1042, %719 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> | |
%1044 = torch.aten.to.dtype %1043, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> | |
%1045 = torch.aten.sum.dim_IntList %1044, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1046 = torch.aten.div.Scalar %1045, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1047 = torch.aten.sub.Tensor %1044, %1046, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> | |
%1048 = torch.aten.mul.Tensor %1047, %1047 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> | |
%1049 = torch.aten.sum.dim_IntList %1048, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1050 = torch.aten.div.Scalar %1049, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1051 = torch.aten.to.dtype %1050, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1052 = torch.aten.sum.dim_IntList %1043, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1053 = torch.aten.div.Scalar %1052, %int40960 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1054 = torch.aten.add.Scalar %1051, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1055 = torch.aten.rsqrt %1054 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1056 = torch.aten.sub.Tensor %1043, %1053, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,10,4096],f16> | |
%1057 = torch.aten.mul.Tensor %1056, %1055 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,10,4096],f16> | |
%1058 = torch.aten.view %1057, %736 : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> | |
%1059 = torch.aten.unsqueeze %637, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%1060 = torch.aten.unsqueeze %1059, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%1061 = torch.aten.unsqueeze %1060, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%1062 = torch.aten.unsqueeze %636, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%1063 = torch.aten.unsqueeze %1062, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%1064 = torch.aten.unsqueeze %1063, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%1065 = torch.aten.mul.Tensor %1058, %1064 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%1066 = torch.aten.add.Tensor %1065, %1061, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1067 = torch.aten.sigmoid %1066 : !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%1068 = torch.aten.mul.Tensor %1067, %1066 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%1069 = torch.aten.convolution %1068, %635, %634, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1070 = torch.aten.add.Tensor %1006, %1069, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1071 = torch.aten.div.Scalar %1070, %float1.000000e00 : !torch.vtensor<[2,320,64,64],f16>, !torch.float -> !torch.vtensor<[2,320,64,64],f16> | |
%1072 = torch.aten.view %1071, %719 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,4096],f16> | |
%1073 = torch.aten.to.dtype %1072, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,4096],f64> | |
%1074 = torch.aten.sum.dim_IntList %1073, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1075 = torch.aten.div.Scalar %1074, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1076 = torch.aten.sub.Tensor %1073, %1075, %float1.000000e00 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,4096],f64> | |
%1077 = torch.aten.mul.Tensor %1076, %1076 : !torch.vtensor<[2,32,10,4096],f64>, !torch.vtensor<[2,32,10,4096],f64> -> !torch.vtensor<[2,32,10,4096],f64> | |
%1078 = torch.aten.sum.dim_IntList %1077, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1079 = torch.aten.div.Scalar %1078, %int40960 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1080 = torch.aten.to.dtype %1079, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1081 = torch.aten.sum.dim_IntList %1072, %721, %true, %none : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1082 = torch.aten.div.Scalar %1081, %int40960 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1083 = torch.aten.add.Scalar %1080, %float9.999990e-07, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1084 = torch.aten.rsqrt %1083 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1085 = torch.aten.sub.Tensor %1072, %1082, %int1 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,10,4096],f16> | |
%1086 = torch.aten.mul.Tensor %1085, %1084 : !torch.vtensor<[2,32,10,4096],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,10,4096],f16> | |
%1087 = torch.aten.view %1086, %736 : !torch.vtensor<[2,32,10,4096],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> | |
%1088 = torch.aten.unsqueeze %633, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%1089 = torch.aten.unsqueeze %1088, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%1090 = torch.aten.unsqueeze %1089, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%1091 = torch.aten.unsqueeze %632, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%1092 = torch.aten.unsqueeze %1091, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%1093 = torch.aten.unsqueeze %1092, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%1094 = torch.aten.mul.Tensor %1087, %1093 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[2,320,64,64],f16> | |
%1095 = torch.aten.add.Tensor %1094, %1090, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1096 = torch.aten.permute %1095, %811 : !torch.vtensor<[2,320,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> | |
%1097 = torch.aten.view %1096, %813 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1098 = torch.aten.transpose.int %631, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%1099 = torch.aten.broadcast_to %1097, %813 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1100 = torch.aten.view %1099, %813 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1101 = torch.aten.broadcast_to %1098, %818 : !torch.vtensor<[320,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,320],f16> | |
%1102 = torch.aten.view %1101, %818 : !torch.vtensor<[2,320,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,320],f16> | |
%1103 = torch.aten.bmm %1100, %1102 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,320,320],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%1104 = torch.aten.view %1103, %813 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1105 = torch.aten.add.Tensor %1104, %630, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1106 = torch.aten.to.dtype %1105, %int7, %false, %false, %none : !torch.vtensor<[2,4096,320],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,320],f64> | |
%1107 = torch.aten.sum.dim_IntList %1106, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%1108 = torch.aten.div.Scalar %1107, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%1109 = torch.aten.sub.Tensor %1106, %1108, %float1.000000e00 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,1],f64>, !torch.float -> !torch.vtensor<[2,4096,320],f64> | |
%1110 = torch.aten.mul.Tensor %1109, %1109 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,320],f64> -> !torch.vtensor<[2,4096,320],f64> | |
%1111 = torch.aten.sum.dim_IntList %1110, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%1112 = torch.aten.div.Scalar %1111, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%1113 = torch.aten.to.dtype %1112, %int5, %false, %false, %none : !torch.vtensor<[2,4096,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%1114 = torch.aten.sum.dim_IntList %1105, %688, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%1115 = torch.aten.div.Scalar %1114, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%1116 = torch.aten.add.Scalar %1113, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%1117 = torch.aten.rsqrt %1116 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> | |
%1118 = torch.aten.sub.Tensor %1105, %1115, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1119 = torch.aten.mul.Tensor %1118, %1117 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%1120 = torch.aten.mul.Tensor %1119, %629 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%1121 = torch.aten.add.Tensor %1120, %628, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1122 = torch.aten.transpose.int %627, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%1123 = torch.aten.view %1121, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%1124 = torch.aten.mm %1123, %1122 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%1125 = torch.aten.view %1124, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1126 = torch.aten.view %1125, %845 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%1127 = torch.aten.permute %1126, %847 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%1128 = torch.aten.clone %1127, %int0 : !torch.vtensor<[2,5,4096,64],f16>, !torch.int -> !torch.vtensor<[2,5,4096,64],f16> | |
%1129 = torch.aten.view %1128, %850 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[10,4096,64],f16> | |
%1130 = torch.aten.transpose.int %626, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%1131 = torch.aten.view %1121, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%1132 = torch.aten.mm %1131, %1130 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%1133 = torch.aten.view %1132, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1134 = torch.aten.transpose.int %625, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%1135 = torch.aten.view %1121, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%1136 = torch.aten.mm %1135, %1134 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%1137 = torch.aten.view %1136, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1138 = torch.aten.view %1133, %845 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%1139 = torch.aten.permute %1138, %847 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%1140 = torch.aten.clone %1139, %int0 : !torch.vtensor<[2,5,4096,64],f16>, !torch.int -> !torch.vtensor<[2,5,4096,64],f16> | |
%1141 = torch.aten.view %1140, %850 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[10,4096,64],f16> | |
%1142 = torch.aten.view %1137, %845 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%1143 = torch.aten.permute %1142, %847 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%1144 = torch.aten.clone %1143, %int0 : !torch.vtensor<[2,5,4096,64],f16>, !torch.int -> !torch.vtensor<[2,5,4096,64],f16> | |
%1145 = torch.aten.view %1144, %850 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[10,4096,64],f16> | |
%1146 = torch.aten.empty.memory_format %868, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,4096],f32> | |
%1147 = torch.aten.transpose.int %1141, %int-1, %int-2 : !torch.vtensor<[10,4096,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[10,64,4096],f16> | |
%1148 = torch.aten.bmm %1129, %1147 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,64,4096],f16> -> !torch.vtensor<[10,4096,4096],f16> | |
%1149 = torch.aten.mul.Scalar %1148, %float1.250000e-01 : !torch.vtensor<[10,4096,4096],f16>, !torch.float -> !torch.vtensor<[10,4096,4096],f16> | |
%1150 = torch.aten.to.dtype %1146, %int5, %false, %false, %none : !torch.vtensor<[10,4096,4096],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,4096],f16> | |
%1151 = torch.aten.add.Tensor %1149, %1146, %int0 : !torch.vtensor<[10,4096,4096],f16>, !torch.vtensor<[10,4096,4096],f32>, !torch.int -> !torch.vtensor<[10,4096,4096],f16> | |
%values_2, %indices_3 = torch.aten.max.dim %1151, %int-1, %true : !torch.vtensor<[10,4096,4096],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,4096,1],f16>, !torch.vtensor<[10,4096,1],si64> | |
%1152 = torch.aten.sub.Tensor %1151, %values_2, %float1.000000e00 : !torch.vtensor<[10,4096,4096],f16>, !torch.vtensor<[10,4096,1],f16>, !torch.float -> !torch.vtensor<[10,4096,4096],f16> | |
%1153 = torch.aten.exp %1152 : !torch.vtensor<[10,4096,4096],f16> -> !torch.vtensor<[10,4096,4096],f16> | |
%1154 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%1155 = torch.aten.sum.dim_IntList %1153, %1154, %true, %none : !torch.vtensor<[10,4096,4096],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,1],f16> | |
%1156 = torch.aten.div.Tensor %1153, %1155 : !torch.vtensor<[10,4096,4096],f16>, !torch.vtensor<[10,4096,1],f16> -> !torch.vtensor<[10,4096,4096],f16> | |
%1157 = torch.aten.bmm %1156, %1145 : !torch.vtensor<[10,4096,4096],f16>, !torch.vtensor<[10,4096,64],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%1158 = torch.aten.view %1157, %881 : !torch.vtensor<[10,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%1159 = torch.aten.permute %1158, %847 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%1160 = torch.aten.clone %1159, %int0 : !torch.vtensor<[2,4096,5,64],f16>, !torch.int -> !torch.vtensor<[2,4096,5,64],f16> | |
%1161 = torch.aten.view %1160, %813 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1162 = torch.aten.transpose.int %624, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%1163 = torch.aten.view %1161, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%1164 = torch.aten.mm %1163, %1162 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%1165 = torch.aten.mul.Scalar %623, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%1166 = torch.aten.add.Tensor %1165, %1164, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> | |
%1167 = torch.aten.view %1166, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1168 = torch.aten.add.Tensor %1167, %1105, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1169 = torch.aten.to.dtype %1168, %int7, %false, %false, %none : !torch.vtensor<[2,4096,320],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,320],f64> | |
%1170 = torch.aten.sum.dim_IntList %1169, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%1171 = torch.aten.div.Scalar %1170, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%1172 = torch.aten.sub.Tensor %1169, %1171, %float1.000000e00 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,1],f64>, !torch.float -> !torch.vtensor<[2,4096,320],f64> | |
%1173 = torch.aten.mul.Tensor %1172, %1172 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,320],f64> -> !torch.vtensor<[2,4096,320],f64> | |
%1174 = torch.aten.sum.dim_IntList %1173, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%1175 = torch.aten.div.Scalar %1174, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%1176 = torch.aten.to.dtype %1175, %int5, %false, %false, %none : !torch.vtensor<[2,4096,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%1177 = torch.aten.sum.dim_IntList %1168, %688, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%1178 = torch.aten.div.Scalar %1177, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%1179 = torch.aten.add.Scalar %1176, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%1180 = torch.aten.rsqrt %1179 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> | |
%1181 = torch.aten.sub.Tensor %1168, %1178, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1182 = torch.aten.mul.Tensor %1181, %1180 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%1183 = torch.aten.mul.Tensor %1182, %622 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%1184 = torch.aten.add.Tensor %1183, %621, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1185 = torch.aten.transpose.int %620, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%1186 = torch.aten.view %1184, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%1187 = torch.aten.mm %1186, %1185 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%1188 = torch.aten.view %1187, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1189 = torch.aten.view %1188, %845 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%1190 = torch.aten.permute %1189, %847 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%1191 = torch.aten.clone %1190, %int0 : !torch.vtensor<[2,5,4096,64],f16>, !torch.int -> !torch.vtensor<[2,5,4096,64],f16> | |
%1192 = torch.aten.view %1191, %850 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[10,4096,64],f16> | |
%1193 = torch.aten.transpose.int %619, %int0, %int1 : !torch.vtensor<[320,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,320],f16> | |
%1194 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%1195 = torch.aten.mm %1194, %1193 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[128,320],f16> | |
%1196 = torch.aten.view %1195, %921 : !torch.vtensor<[128,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,320],f16> | |
%1197 = torch.aten.transpose.int %618, %int0, %int1 : !torch.vtensor<[320,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,320],f16> | |
%1198 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%1199 = torch.aten.mm %1198, %1197 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,320],f16> -> !torch.vtensor<[128,320],f16> | |
%1200 = torch.aten.view %1199, %921 : !torch.vtensor<[128,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,320],f16> | |
%1201 = torch.aten.view %1196, %927 : !torch.vtensor<[2,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,5,64],f16> | |
%1202 = torch.aten.permute %1201, %847 : !torch.vtensor<[2,64,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,64,64],f16> | |
%1203 = torch.aten.clone %1202, %int0 : !torch.vtensor<[2,5,64,64],f16>, !torch.int -> !torch.vtensor<[2,5,64,64],f16> | |
%1204 = torch.aten.view %1203, %931 : !torch.vtensor<[2,5,64,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,64],f16> | |
%1205 = torch.aten.view %1200, %927 : !torch.vtensor<[2,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,5,64],f16> | |
%1206 = torch.aten.permute %1205, %847 : !torch.vtensor<[2,64,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,64,64],f16> | |
%1207 = torch.aten.clone %1206, %int0 : !torch.vtensor<[2,5,64,64],f16>, !torch.int -> !torch.vtensor<[2,5,64,64],f16> | |
%1208 = torch.aten.view %1207, %931 : !torch.vtensor<[2,5,64,64],f16>, !torch.list<int> -> !torch.vtensor<[10,64,64],f16> | |
%1209 = torch.aten.empty.memory_format %850, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,64],f32> | |
%1210 = torch.aten.transpose.int %1204, %int-1, %int-2 : !torch.vtensor<[10,64,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[10,64,64],f16> | |
%1211 = torch.aten.bmm %1192, %1210 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,64,64],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%1212 = torch.aten.mul.Scalar %1211, %float1.250000e-01 : !torch.vtensor<[10,4096,64],f16>, !torch.float -> !torch.vtensor<[10,4096,64],f16> | |
%1213 = torch.aten.to.dtype %1209, %int5, %false, %false, %none : !torch.vtensor<[10,4096,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,64],f16> | |
%1214 = torch.aten.add.Tensor %1212, %1209, %int0 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,4096,64],f32>, !torch.int -> !torch.vtensor<[10,4096,64],f16> | |
%values_4, %indices_5 = torch.aten.max.dim %1214, %int-1, %true : !torch.vtensor<[10,4096,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[10,4096,1],f16>, !torch.vtensor<[10,4096,1],si64> | |
%1215 = torch.aten.sub.Tensor %1214, %values_4, %float1.000000e00 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,4096,1],f16>, !torch.float -> !torch.vtensor<[10,4096,64],f16> | |
%1216 = torch.aten.exp %1215 : !torch.vtensor<[10,4096,64],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%1217 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%1218 = torch.aten.sum.dim_IntList %1216, %1217, %true, %none : !torch.vtensor<[10,4096,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[10,4096,1],f16> | |
%1219 = torch.aten.div.Tensor %1216, %1218 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,4096,1],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%1220 = torch.aten.bmm %1219, %1208 : !torch.vtensor<[10,4096,64],f16>, !torch.vtensor<[10,64,64],f16> -> !torch.vtensor<[10,4096,64],f16> | |
%1221 = torch.aten.view %1220, %881 : !torch.vtensor<[10,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[2,5,4096,64],f16> | |
%1222 = torch.aten.permute %1221, %847 : !torch.vtensor<[2,5,4096,64],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,5,64],f16> | |
%1223 = torch.aten.clone %1222, %int0 : !torch.vtensor<[2,4096,5,64],f16>, !torch.int -> !torch.vtensor<[2,4096,5,64],f16> | |
%1224 = torch.aten.view %1223, %813 : !torch.vtensor<[2,4096,5,64],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1225 = torch.aten.transpose.int %617, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%1226 = torch.aten.view %1224, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%1227 = torch.aten.mm %1226, %1225 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%1228 = torch.aten.mul.Scalar %616, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%1229 = torch.aten.add.Tensor %1228, %1227, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> | |
%1230 = torch.aten.view %1229, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1231 = torch.aten.add.Tensor %1230, %1168, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1232 = torch.aten.to.dtype %1231, %int7, %false, %false, %none : !torch.vtensor<[2,4096,320],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,320],f64> | |
%1233 = torch.aten.sum.dim_IntList %1232, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%1234 = torch.aten.div.Scalar %1233, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%1235 = torch.aten.sub.Tensor %1232, %1234, %float1.000000e00 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,1],f64>, !torch.float -> !torch.vtensor<[2,4096,320],f64> | |
%1236 = torch.aten.mul.Tensor %1235, %1235 : !torch.vtensor<[2,4096,320],f64>, !torch.vtensor<[2,4096,320],f64> -> !torch.vtensor<[2,4096,320],f64> | |
%1237 = torch.aten.sum.dim_IntList %1236, %688, %true, %none : !torch.vtensor<[2,4096,320],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f64> | |
%1238 = torch.aten.div.Scalar %1237, %int320 : !torch.vtensor<[2,4096,1],f64>, !torch.int -> !torch.vtensor<[2,4096,1],f64> | |
%1239 = torch.aten.to.dtype %1238, %int5, %false, %false, %none : !torch.vtensor<[2,4096,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%1240 = torch.aten.sum.dim_IntList %1231, %688, %true, %none : !torch.vtensor<[2,4096,320],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,4096,1],f16> | |
%1241 = torch.aten.div.Scalar %1240, %int320 : !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%1242 = torch.aten.add.Scalar %1239, %float1.000000e-05, %int1 : !torch.vtensor<[2,4096,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,4096,1],f16> | |
%1243 = torch.aten.rsqrt %1242 : !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,1],f16> | |
%1244 = torch.aten.sub.Tensor %1231, %1241, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1245 = torch.aten.mul.Tensor %1244, %1243 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,1],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%1246 = torch.aten.mul.Tensor %1245, %615 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16> -> !torch.vtensor<[2,4096,320],f16> | |
%1247 = torch.aten.add.Tensor %1246, %614, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1248 = torch.aten.transpose.int %613, %int0, %int1 : !torch.vtensor<[2560,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,2560],f16> | |
%1249 = torch.aten.view %1247, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%1250 = torch.aten.mm %1249, %1248 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,2560],f16> -> !torch.vtensor<[8192,2560],f16> | |
%1251 = torch.aten.mul.Scalar %612, %int1 : !torch.vtensor<[2560],f16>, !torch.int -> !torch.vtensor<[2560],f16> | |
%1252 = torch.aten.add.Tensor %1251, %1250, %int1 : !torch.vtensor<[2560],f16>, !torch.vtensor<[8192,2560],f16>, !torch.int -> !torch.vtensor<[8192,2560],f16> | |
%1253 = torch.aten.view %1252, %981 : !torch.vtensor<[8192,2560],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,2560],f16> | |
%1254 = torch.aten.slice.Tensor %1253, %int-1, %int0, %int1280, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> | |
%1255 = torch.aten.slice.Tensor %1253, %int-1, %int1280, %int2560, %int1 : !torch.vtensor<[2,4096,2560],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,4096,1280],f16> | |
%1256 = torch.aten.gelu %1255, %str : !torch.vtensor<[2,4096,1280],f16>, !torch.str -> !torch.vtensor<[2,4096,1280],f16> | |
%1257 = torch.aten.mul.Tensor %1254, %1256 : !torch.vtensor<[2,4096,1280],f16>, !torch.vtensor<[2,4096,1280],f16> -> !torch.vtensor<[2,4096,1280],f16> | |
%1258 = torch.aten.transpose.int %611, %int0, %int1 : !torch.vtensor<[320,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,320],f16> | |
%1259 = torch.aten.view %1257, %988 : !torch.vtensor<[2,4096,1280],f16>, !torch.list<int> -> !torch.vtensor<[8192,1280],f16> | |
%1260 = torch.aten.mm %1259, %1258 : !torch.vtensor<[8192,1280],f16>, !torch.vtensor<[1280,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%1261 = torch.aten.mul.Scalar %610, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%1262 = torch.aten.add.Tensor %1261, %1260, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> | |
%1263 = torch.aten.view %1262, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1264 = torch.aten.add.Tensor %1263, %1231, %int1 : !torch.vtensor<[2,4096,320],f16>, !torch.vtensor<[2,4096,320],f16>, !torch.int -> !torch.vtensor<[2,4096,320],f16> | |
%1265 = torch.aten.transpose.int %609, %int0, %int1 : !torch.vtensor<[320,320],f16>, !torch.int, !torch.int -> !torch.vtensor<[320,320],f16> | |
%1266 = torch.aten.view %1264, %841 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[8192,320],f16> | |
%1267 = torch.aten.mm %1266, %1265 : !torch.vtensor<[8192,320],f16>, !torch.vtensor<[320,320],f16> -> !torch.vtensor<[8192,320],f16> | |
%1268 = torch.aten.mul.Scalar %608, %int1 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[320],f16> | |
%1269 = torch.aten.add.Tensor %1268, %1267, %int1 : !torch.vtensor<[320],f16>, !torch.vtensor<[8192,320],f16>, !torch.int -> !torch.vtensor<[8192,320],f16> | |
%1270 = torch.aten.view %1269, %813 : !torch.vtensor<[8192,320],f16>, !torch.list<int> -> !torch.vtensor<[2,4096,320],f16> | |
%1271 = torch.aten.view %1270, %1001 : !torch.vtensor<[2,4096,320],f16>, !torch.list<int> -> !torch.vtensor<[2,64,64,320],f16> | |
%1272 = torch.aten.permute %1271, %1003 : !torch.vtensor<[2,64,64,320],f16>, !torch.list<int> -> !torch.vtensor<[2,320,64,64],f16> | |
%1273 = torch.aten.clone %1272, %int0 : !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1274 = torch.aten.add.Tensor %1273, %1071, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[2,320,64,64],f16>, !torch.int -> !torch.vtensor<[2,320,64,64],f16> | |
%1275 = torch.prim.ListConstruct %int2, %int2 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1276 = torch.aten.convolution %1274, %607, %606, %1275, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,320,64,64],f16>, !torch.vtensor<[320,320,3,3],f16>, !torch.vtensor<[320],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> | |
%1277 = torch.prim.ListConstruct %int2, %int32, %int10, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1278 = torch.aten.view %1276, %1277 : !torch.vtensor<[2,320,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,10,1024],f16> | |
%1279 = torch.aten.to.dtype %1278, %int7, %false, %false, %none : !torch.vtensor<[2,32,10,1024],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,10,1024],f64> | |
%1280 = torch.aten.sum.dim_IntList %1279, %721, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1281 = torch.aten.div.Scalar %1280, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1282 = torch.aten.sub.Tensor %1279, %1281, %float1.000000e00 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,10,1024],f64> | |
%1283 = torch.aten.mul.Tensor %1282, %1282 : !torch.vtensor<[2,32,10,1024],f64>, !torch.vtensor<[2,32,10,1024],f64> -> !torch.vtensor<[2,32,10,1024],f64> | |
%1284 = torch.aten.sum.dim_IntList %1283, %721, %true, %none : !torch.vtensor<[2,32,10,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1285 = torch.aten.div.Scalar %1284, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1286 = torch.aten.to.dtype %1285, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1287 = torch.aten.sum.dim_IntList %1278, %721, %true, %none : !torch.vtensor<[2,32,10,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1288 = torch.aten.div.Scalar %1287, %int10240 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1289 = torch.aten.add.Scalar %1286, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1290 = torch.aten.rsqrt %1289 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1291 = torch.aten.sub.Tensor %1278, %1288, %int1 : !torch.vtensor<[2,32,10,1024],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,10,1024],f16> | |
%1292 = torch.aten.mul.Tensor %1291, %1290 : !torch.vtensor<[2,32,10,1024],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,10,1024],f16> | |
%1293 = torch.prim.ListConstruct %int2, %int320, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1294 = torch.aten.view %1292, %1293 : !torch.vtensor<[2,32,10,1024],f16>, !torch.list<int> -> !torch.vtensor<[2,320,32,32],f16> | |
%1295 = torch.aten.unsqueeze %605, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%1296 = torch.aten.unsqueeze %1295, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%1297 = torch.aten.unsqueeze %1296, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%1298 = torch.aten.unsqueeze %604, %int0 : !torch.vtensor<[320],f16>, !torch.int -> !torch.vtensor<[1,320],f16> | |
%1299 = torch.aten.unsqueeze %1298, %int2 : !torch.vtensor<[1,320],f16>, !torch.int -> !torch.vtensor<[1,320,1],f16> | |
%1300 = torch.aten.unsqueeze %1299, %int3 : !torch.vtensor<[1,320,1],f16>, !torch.int -> !torch.vtensor<[1,320,1,1],f16> | |
%1301 = torch.aten.mul.Tensor %1294, %1300 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[1,320,1,1],f16> -> !torch.vtensor<[2,320,32,32],f16> | |
%1302 = torch.aten.add.Tensor %1301, %1297, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[1,320,1,1],f16>, !torch.int -> !torch.vtensor<[2,320,32,32],f16> | |
%1303 = torch.aten.sigmoid %1302 : !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> | |
%1304 = torch.aten.mul.Tensor %1303, %1302 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[2,320,32,32],f16> -> !torch.vtensor<[2,320,32,32],f16> | |
%1305 = torch.aten.convolution %1304, %603, %602, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1306 = torch.aten.sigmoid %715 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%1307 = torch.aten.mul.Tensor %1306, %715 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%1308 = torch.aten.transpose.int %601, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16> | |
%1309 = torch.aten.mm %1307, %1308 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16> | |
%1310 = torch.aten.mul.Scalar %600, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1311 = torch.aten.add.Tensor %1310, %1309, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16> | |
%1312 = torch.aten.unsqueeze %1311, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16> | |
%1313 = torch.aten.unsqueeze %1312, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16> | |
%1314 = torch.aten.add.Tensor %1305, %1313, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1315 = torch.prim.ListConstruct %int2, %int32, %int20, %int1024 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1316 = torch.aten.view %1314, %1315 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1317 = torch.aten.to.dtype %1316, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> | |
%1318 = torch.aten.sum.dim_IntList %1317, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1319 = torch.aten.div.Scalar %1318, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1320 = torch.aten.sub.Tensor %1317, %1319, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> | |
%1321 = torch.aten.mul.Tensor %1320, %1320 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> | |
%1322 = torch.aten.sum.dim_IntList %1321, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1323 = torch.aten.div.Scalar %1322, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1324 = torch.aten.to.dtype %1323, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1325 = torch.aten.sum.dim_IntList %1316, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1326 = torch.aten.div.Scalar %1325, %int20480 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1327 = torch.aten.add.Scalar %1324, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1328 = torch.aten.rsqrt %1327 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1329 = torch.aten.sub.Tensor %1316, %1326, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,20,1024],f16> | |
%1330 = torch.aten.mul.Tensor %1329, %1328 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1331 = torch.prim.ListConstruct %int2, %int640, %int32, %int32 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1332 = torch.aten.view %1330, %1331 : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> | |
%1333 = torch.aten.unsqueeze %599, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1334 = torch.aten.unsqueeze %1333, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1335 = torch.aten.unsqueeze %1334, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1336 = torch.aten.unsqueeze %598, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1337 = torch.aten.unsqueeze %1336, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1338 = torch.aten.unsqueeze %1337, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1339 = torch.aten.mul.Tensor %1332, %1338 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1340 = torch.aten.add.Tensor %1339, %1335, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1341 = torch.aten.sigmoid %1340 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1342 = torch.aten.mul.Tensor %1341, %1340 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1343 = torch.aten.convolution %1342, %597, %596, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1344 = torch.aten.convolution %1276, %595, %594, %716, %717, %716, %false, %717, %int1 : !torch.vtensor<[2,320,32,32],f16>, !torch.vtensor<[640,320,1,1],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1345 = torch.aten.add.Tensor %1344, %1343, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1346 = torch.aten.div.Scalar %1345, %float1.000000e00 : !torch.vtensor<[2,640,32,32],f16>, !torch.float -> !torch.vtensor<[2,640,32,32],f16> | |
%1347 = torch.aten.view %1346, %1315 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1348 = torch.aten.to.dtype %1347, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> | |
%1349 = torch.aten.sum.dim_IntList %1348, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1350 = torch.aten.div.Scalar %1349, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1351 = torch.aten.sub.Tensor %1348, %1350, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> | |
%1352 = torch.aten.mul.Tensor %1351, %1351 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> | |
%1353 = torch.aten.sum.dim_IntList %1352, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1354 = torch.aten.div.Scalar %1353, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1355 = torch.aten.to.dtype %1354, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1356 = torch.aten.sum.dim_IntList %1347, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1357 = torch.aten.div.Scalar %1356, %int20480 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1358 = torch.aten.add.Scalar %1355, %float9.999990e-07, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1359 = torch.aten.rsqrt %1358 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1360 = torch.aten.sub.Tensor %1347, %1357, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,20,1024],f16> | |
%1361 = torch.aten.mul.Tensor %1360, %1359 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1362 = torch.aten.view %1361, %1331 : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> | |
%1363 = torch.aten.unsqueeze %593, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1364 = torch.aten.unsqueeze %1363, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1365 = torch.aten.unsqueeze %1364, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1366 = torch.aten.unsqueeze %592, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1367 = torch.aten.unsqueeze %1366, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1368 = torch.aten.unsqueeze %1367, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1369 = torch.aten.mul.Tensor %1362, %1368 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1370 = torch.aten.add.Tensor %1369, %1365, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1371 = torch.aten.permute %1370, %811 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> | |
%1372 = torch.prim.ListConstruct %int2, %int1024, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1373 = torch.aten.view %1371, %1372 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1374 = torch.aten.transpose.int %591, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1375 = torch.aten.broadcast_to %1373, %1372 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1376 = torch.aten.view %1375, %1372 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1377 = torch.prim.ListConstruct %int2, %int640, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1378 = torch.aten.broadcast_to %1374, %1377 : !torch.vtensor<[640,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,640],f16> | |
%1379 = torch.aten.view %1378, %1377 : !torch.vtensor<[2,640,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,640],f16> | |
%1380 = torch.aten.bmm %1376, %1379 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,640,640],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1381 = torch.aten.view %1380, %1372 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1382 = torch.aten.add.Tensor %1381, %590, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1383 = torch.aten.to.dtype %1382, %int7, %false, %false, %none : !torch.vtensor<[2,1024,640],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,640],f64> | |
%1384 = torch.aten.sum.dim_IntList %1383, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1385 = torch.aten.div.Scalar %1384, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1386 = torch.aten.sub.Tensor %1383, %1385, %float1.000000e00 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,1],f64>, !torch.float -> !torch.vtensor<[2,1024,640],f64> | |
%1387 = torch.aten.mul.Tensor %1386, %1386 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,640],f64> -> !torch.vtensor<[2,1024,640],f64> | |
%1388 = torch.aten.sum.dim_IntList %1387, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1389 = torch.aten.div.Scalar %1388, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1390 = torch.aten.to.dtype %1389, %int5, %false, %false, %none : !torch.vtensor<[2,1024,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1391 = torch.aten.sum.dim_IntList %1382, %688, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1392 = torch.aten.div.Scalar %1391, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1393 = torch.aten.add.Scalar %1390, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1394 = torch.aten.rsqrt %1393 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> | |
%1395 = torch.aten.sub.Tensor %1382, %1392, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1396 = torch.aten.mul.Tensor %1395, %1394 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1397 = torch.aten.mul.Tensor %1396, %589 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1398 = torch.aten.add.Tensor %1397, %588, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1399 = torch.aten.transpose.int %587, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1400 = torch.prim.ListConstruct %int2048, %int640 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1401 = torch.aten.view %1398, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1402 = torch.aten.mm %1401, %1399 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1403 = torch.aten.view %1402, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1404 = torch.prim.ListConstruct %int2, %int1024, %int10, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1405 = torch.aten.view %1403, %1404 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1406 = torch.aten.permute %1405, %847 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1407 = torch.aten.clone %1406, %int0 : !torch.vtensor<[2,10,1024,64],f16>, !torch.int -> !torch.vtensor<[2,10,1024,64],f16> | |
%1408 = torch.prim.ListConstruct %int20, %int1024, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1409 = torch.aten.view %1407, %1408 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[20,1024,64],f16> | |
%1410 = torch.aten.transpose.int %586, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1411 = torch.aten.view %1398, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1412 = torch.aten.mm %1411, %1410 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1413 = torch.aten.view %1412, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1414 = torch.aten.transpose.int %585, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1415 = torch.aten.view %1398, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1416 = torch.aten.mm %1415, %1414 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1417 = torch.aten.view %1416, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1418 = torch.aten.view %1413, %1404 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1419 = torch.aten.permute %1418, %847 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1420 = torch.aten.clone %1419, %int0 : !torch.vtensor<[2,10,1024,64],f16>, !torch.int -> !torch.vtensor<[2,10,1024,64],f16> | |
%1421 = torch.aten.view %1420, %1408 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[20,1024,64],f16> | |
%1422 = torch.aten.view %1417, %1404 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1423 = torch.aten.permute %1422, %847 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1424 = torch.aten.clone %1423, %int0 : !torch.vtensor<[2,10,1024,64],f16>, !torch.int -> !torch.vtensor<[2,10,1024,64],f16> | |
%1425 = torch.aten.view %1424, %1408 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[20,1024,64],f16> | |
%1426 = torch.prim.ListConstruct %int20, %int1024, %int1024 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1427 = torch.aten.empty.memory_format %1426, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,1024],f32> | |
%1428 = torch.aten.transpose.int %1421, %int-1, %int-2 : !torch.vtensor<[20,1024,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[20,64,1024],f16> | |
%1429 = torch.aten.bmm %1409, %1428 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,64,1024],f16> -> !torch.vtensor<[20,1024,1024],f16> | |
%1430 = torch.aten.mul.Scalar %1429, %float1.250000e-01 : !torch.vtensor<[20,1024,1024],f16>, !torch.float -> !torch.vtensor<[20,1024,1024],f16> | |
%1431 = torch.aten.to.dtype %1427, %int5, %false, %false, %none : !torch.vtensor<[20,1024,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,1024],f16> | |
%1432 = torch.aten.add.Tensor %1430, %1427, %int0 : !torch.vtensor<[20,1024,1024],f16>, !torch.vtensor<[20,1024,1024],f32>, !torch.int -> !torch.vtensor<[20,1024,1024],f16> | |
%values_6, %indices_7 = torch.aten.max.dim %1432, %int-1, %true : !torch.vtensor<[20,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,1024,1],f16>, !torch.vtensor<[20,1024,1],si64> | |
%1433 = torch.aten.sub.Tensor %1432, %values_6, %float1.000000e00 : !torch.vtensor<[20,1024,1024],f16>, !torch.vtensor<[20,1024,1],f16>, !torch.float -> !torch.vtensor<[20,1024,1024],f16> | |
%1434 = torch.aten.exp %1433 : !torch.vtensor<[20,1024,1024],f16> -> !torch.vtensor<[20,1024,1024],f16> | |
%1435 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%1436 = torch.aten.sum.dim_IntList %1434, %1435, %true, %none : !torch.vtensor<[20,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,1],f16> | |
%1437 = torch.aten.div.Tensor %1434, %1436 : !torch.vtensor<[20,1024,1024],f16>, !torch.vtensor<[20,1024,1],f16> -> !torch.vtensor<[20,1024,1024],f16> | |
%1438 = torch.aten.bmm %1437, %1425 : !torch.vtensor<[20,1024,1024],f16>, !torch.vtensor<[20,1024,64],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1439 = torch.prim.ListConstruct %int2, %int10, %int1024, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1440 = torch.aten.view %1438, %1439 : !torch.vtensor<[20,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1441 = torch.aten.permute %1440, %847 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1442 = torch.aten.clone %1441, %int0 : !torch.vtensor<[2,1024,10,64],f16>, !torch.int -> !torch.vtensor<[2,1024,10,64],f16> | |
%1443 = torch.aten.view %1442, %1372 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1444 = torch.aten.transpose.int %584, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1445 = torch.aten.view %1443, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1446 = torch.aten.mm %1445, %1444 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1447 = torch.aten.mul.Scalar %583, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1448 = torch.aten.add.Tensor %1447, %1446, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> | |
%1449 = torch.aten.view %1448, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1450 = torch.aten.add.Tensor %1449, %1382, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1451 = torch.aten.to.dtype %1450, %int7, %false, %false, %none : !torch.vtensor<[2,1024,640],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,640],f64> | |
%1452 = torch.aten.sum.dim_IntList %1451, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1453 = torch.aten.div.Scalar %1452, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1454 = torch.aten.sub.Tensor %1451, %1453, %float1.000000e00 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,1],f64>, !torch.float -> !torch.vtensor<[2,1024,640],f64> | |
%1455 = torch.aten.mul.Tensor %1454, %1454 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,640],f64> -> !torch.vtensor<[2,1024,640],f64> | |
%1456 = torch.aten.sum.dim_IntList %1455, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1457 = torch.aten.div.Scalar %1456, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1458 = torch.aten.to.dtype %1457, %int5, %false, %false, %none : !torch.vtensor<[2,1024,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1459 = torch.aten.sum.dim_IntList %1450, %688, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1460 = torch.aten.div.Scalar %1459, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1461 = torch.aten.add.Scalar %1458, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1462 = torch.aten.rsqrt %1461 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> | |
%1463 = torch.aten.sub.Tensor %1450, %1460, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1464 = torch.aten.mul.Tensor %1463, %1462 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1465 = torch.aten.mul.Tensor %1464, %582 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1466 = torch.aten.add.Tensor %1465, %581, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1467 = torch.aten.transpose.int %580, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1468 = torch.aten.view %1466, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1469 = torch.aten.mm %1468, %1467 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1470 = torch.aten.view %1469, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1471 = torch.aten.view %1470, %1404 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1472 = torch.aten.permute %1471, %847 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1473 = torch.aten.clone %1472, %int0 : !torch.vtensor<[2,10,1024,64],f16>, !torch.int -> !torch.vtensor<[2,10,1024,64],f16> | |
%1474 = torch.aten.view %1473, %1408 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[20,1024,64],f16> | |
%1475 = torch.aten.transpose.int %579, %int0, %int1 : !torch.vtensor<[640,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,640],f16> | |
%1476 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%1477 = torch.aten.mm %1476, %1475 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[128,640],f16> | |
%1478 = torch.prim.ListConstruct %int2, %int64, %int640 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1479 = torch.aten.view %1477, %1478 : !torch.vtensor<[128,640],f16>, !torch.list<int> -> !torch.vtensor<[2,64,640],f16> | |
%1480 = torch.aten.transpose.int %578, %int0, %int1 : !torch.vtensor<[640,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,640],f16> | |
%1481 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%1482 = torch.aten.mm %1481, %1480 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[128,640],f16> | |
%1483 = torch.aten.view %1482, %1478 : !torch.vtensor<[128,640],f16>, !torch.list<int> -> !torch.vtensor<[2,64,640],f16> | |
%1484 = torch.prim.ListConstruct %int2, %int64, %int10, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1485 = torch.aten.view %1479, %1484 : !torch.vtensor<[2,64,640],f16>, !torch.list<int> -> !torch.vtensor<[2,64,10,64],f16> | |
%1486 = torch.aten.permute %1485, %847 : !torch.vtensor<[2,64,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,64,64],f16> | |
%1487 = torch.aten.clone %1486, %int0 : !torch.vtensor<[2,10,64,64],f16>, !torch.int -> !torch.vtensor<[2,10,64,64],f16> | |
%1488 = torch.prim.ListConstruct %int20, %int64, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1489 = torch.aten.view %1487, %1488 : !torch.vtensor<[2,10,64,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,64],f16> | |
%1490 = torch.aten.view %1483, %1484 : !torch.vtensor<[2,64,640],f16>, !torch.list<int> -> !torch.vtensor<[2,64,10,64],f16> | |
%1491 = torch.aten.permute %1490, %847 : !torch.vtensor<[2,64,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,64,64],f16> | |
%1492 = torch.aten.clone %1491, %int0 : !torch.vtensor<[2,10,64,64],f16>, !torch.int -> !torch.vtensor<[2,10,64,64],f16> | |
%1493 = torch.aten.view %1492, %1488 : !torch.vtensor<[2,10,64,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,64],f16> | |
%1494 = torch.aten.empty.memory_format %1408, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,64],f32> | |
%1495 = torch.aten.transpose.int %1489, %int-1, %int-2 : !torch.vtensor<[20,64,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[20,64,64],f16> | |
%1496 = torch.aten.bmm %1474, %1495 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,64,64],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1497 = torch.aten.mul.Scalar %1496, %float1.250000e-01 : !torch.vtensor<[20,1024,64],f16>, !torch.float -> !torch.vtensor<[20,1024,64],f16> | |
%1498 = torch.aten.to.dtype %1494, %int5, %false, %false, %none : !torch.vtensor<[20,1024,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,64],f16> | |
%1499 = torch.aten.add.Tensor %1497, %1494, %int0 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,1024,64],f32>, !torch.int -> !torch.vtensor<[20,1024,64],f16> | |
%values_8, %indices_9 = torch.aten.max.dim %1499, %int-1, %true : !torch.vtensor<[20,1024,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,1024,1],f16>, !torch.vtensor<[20,1024,1],si64> | |
%1500 = torch.aten.sub.Tensor %1499, %values_8, %float1.000000e00 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,1024,1],f16>, !torch.float -> !torch.vtensor<[20,1024,64],f16> | |
%1501 = torch.aten.exp %1500 : !torch.vtensor<[20,1024,64],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1502 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%1503 = torch.aten.sum.dim_IntList %1501, %1502, %true, %none : !torch.vtensor<[20,1024,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,1],f16> | |
%1504 = torch.aten.div.Tensor %1501, %1503 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,1024,1],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1505 = torch.aten.bmm %1504, %1493 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,64,64],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1506 = torch.aten.view %1505, %1439 : !torch.vtensor<[20,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1507 = torch.aten.permute %1506, %847 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1508 = torch.aten.clone %1507, %int0 : !torch.vtensor<[2,1024,10,64],f16>, !torch.int -> !torch.vtensor<[2,1024,10,64],f16> | |
%1509 = torch.aten.view %1508, %1372 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1510 = torch.aten.transpose.int %577, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1511 = torch.aten.view %1509, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1512 = torch.aten.mm %1511, %1510 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1513 = torch.aten.mul.Scalar %576, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1514 = torch.aten.add.Tensor %1513, %1512, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> | |
%1515 = torch.aten.view %1514, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1516 = torch.aten.add.Tensor %1515, %1450, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1517 = torch.aten.to.dtype %1516, %int7, %false, %false, %none : !torch.vtensor<[2,1024,640],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,640],f64> | |
%1518 = torch.aten.sum.dim_IntList %1517, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1519 = torch.aten.div.Scalar %1518, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1520 = torch.aten.sub.Tensor %1517, %1519, %float1.000000e00 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,1],f64>, !torch.float -> !torch.vtensor<[2,1024,640],f64> | |
%1521 = torch.aten.mul.Tensor %1520, %1520 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,640],f64> -> !torch.vtensor<[2,1024,640],f64> | |
%1522 = torch.aten.sum.dim_IntList %1521, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1523 = torch.aten.div.Scalar %1522, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1524 = torch.aten.to.dtype %1523, %int5, %false, %false, %none : !torch.vtensor<[2,1024,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1525 = torch.aten.sum.dim_IntList %1516, %688, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1526 = torch.aten.div.Scalar %1525, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1527 = torch.aten.add.Scalar %1524, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1528 = torch.aten.rsqrt %1527 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> | |
%1529 = torch.aten.sub.Tensor %1516, %1526, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1530 = torch.aten.mul.Tensor %1529, %1528 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1531 = torch.aten.mul.Tensor %1530, %575 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1532 = torch.aten.add.Tensor %1531, %574, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1533 = torch.aten.transpose.int %573, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16> | |
%1534 = torch.aten.view %1532, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1535 = torch.aten.mm %1534, %1533 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16> | |
%1536 = torch.aten.mul.Scalar %572, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16> | |
%1537 = torch.aten.add.Tensor %1536, %1535, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16> | |
%1538 = torch.prim.ListConstruct %int2, %int1024, %int5120 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1539 = torch.aten.view %1537, %1538 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16> | |
%1540 = torch.aten.slice.Tensor %1539, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> | |
%1541 = torch.aten.slice.Tensor %1539, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> | |
%1542 = torch.aten.gelu %1541, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16> | |
%1543 = torch.aten.mul.Tensor %1540, %1542 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16> | |
%1544 = torch.aten.transpose.int %571, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16> | |
%1545 = torch.prim.ListConstruct %int2048, %int2560 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1546 = torch.aten.view %1543, %1545 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16> | |
%1547 = torch.aten.mm %1546, %1544 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1548 = torch.aten.mul.Scalar %570, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1549 = torch.aten.add.Tensor %1548, %1547, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> | |
%1550 = torch.aten.view %1549, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1551 = torch.aten.add.Tensor %1550, %1516, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1552 = torch.aten.transpose.int %569, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1553 = torch.aten.view %1551, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1554 = torch.aten.mm %1553, %1552 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1555 = torch.aten.mul.Scalar %568, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1556 = torch.aten.add.Tensor %1555, %1554, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> | |
%1557 = torch.aten.view %1556, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1558 = torch.prim.ListConstruct %int2, %int32, %int32, %int640 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1559 = torch.aten.view %1557, %1558 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> | |
%1560 = torch.aten.permute %1559, %1003 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> | |
%1561 = torch.aten.clone %1560, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1562 = torch.aten.add.Tensor %1561, %1346, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1563 = torch.aten.view %1562, %1315 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1564 = torch.aten.to.dtype %1563, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> | |
%1565 = torch.aten.sum.dim_IntList %1564, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1566 = torch.aten.div.Scalar %1565, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1567 = torch.aten.sub.Tensor %1564, %1566, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> | |
%1568 = torch.aten.mul.Tensor %1567, %1567 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> | |
%1569 = torch.aten.sum.dim_IntList %1568, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1570 = torch.aten.div.Scalar %1569, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1571 = torch.aten.to.dtype %1570, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1572 = torch.aten.sum.dim_IntList %1563, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1573 = torch.aten.div.Scalar %1572, %int20480 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1574 = torch.aten.add.Scalar %1571, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1575 = torch.aten.rsqrt %1574 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1576 = torch.aten.sub.Tensor %1563, %1573, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,20,1024],f16> | |
%1577 = torch.aten.mul.Tensor %1576, %1575 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1578 = torch.aten.view %1577, %1331 : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> | |
%1579 = torch.aten.unsqueeze %567, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1580 = torch.aten.unsqueeze %1579, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1581 = torch.aten.unsqueeze %1580, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1582 = torch.aten.unsqueeze %566, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1583 = torch.aten.unsqueeze %1582, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1584 = torch.aten.unsqueeze %1583, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1585 = torch.aten.mul.Tensor %1578, %1584 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1586 = torch.aten.add.Tensor %1585, %1581, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1587 = torch.aten.sigmoid %1586 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1588 = torch.aten.mul.Tensor %1587, %1586 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1589 = torch.aten.convolution %1588, %565, %564, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1590 = torch.aten.sigmoid %715 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%1591 = torch.aten.mul.Tensor %1590, %715 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%1592 = torch.aten.transpose.int %563, %int0, %int1 : !torch.vtensor<[640,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,640],f16> | |
%1593 = torch.aten.mm %1591, %1592 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,640],f16> -> !torch.vtensor<[2,640],f16> | |
%1594 = torch.aten.mul.Scalar %562, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1595 = torch.aten.add.Tensor %1594, %1593, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640],f16> | |
%1596 = torch.aten.unsqueeze %1595, %int2 : !torch.vtensor<[2,640],f16>, !torch.int -> !torch.vtensor<[2,640,1],f16> | |
%1597 = torch.aten.unsqueeze %1596, %int3 : !torch.vtensor<[2,640,1],f16>, !torch.int -> !torch.vtensor<[2,640,1,1],f16> | |
%1598 = torch.aten.add.Tensor %1589, %1597, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1599 = torch.aten.view %1598, %1315 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1600 = torch.aten.to.dtype %1599, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> | |
%1601 = torch.aten.sum.dim_IntList %1600, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1602 = torch.aten.div.Scalar %1601, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1603 = torch.aten.sub.Tensor %1600, %1602, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> | |
%1604 = torch.aten.mul.Tensor %1603, %1603 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> | |
%1605 = torch.aten.sum.dim_IntList %1604, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1606 = torch.aten.div.Scalar %1605, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1607 = torch.aten.to.dtype %1606, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1608 = torch.aten.sum.dim_IntList %1599, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1609 = torch.aten.div.Scalar %1608, %int20480 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1610 = torch.aten.add.Scalar %1607, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1611 = torch.aten.rsqrt %1610 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1612 = torch.aten.sub.Tensor %1599, %1609, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,20,1024],f16> | |
%1613 = torch.aten.mul.Tensor %1612, %1611 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1614 = torch.aten.view %1613, %1331 : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> | |
%1615 = torch.aten.unsqueeze %561, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1616 = torch.aten.unsqueeze %1615, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1617 = torch.aten.unsqueeze %1616, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1618 = torch.aten.unsqueeze %560, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1619 = torch.aten.unsqueeze %1618, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1620 = torch.aten.unsqueeze %1619, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1621 = torch.aten.mul.Tensor %1614, %1620 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1622 = torch.aten.add.Tensor %1621, %1617, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1623 = torch.aten.sigmoid %1622 : !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1624 = torch.aten.mul.Tensor %1623, %1622 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1625 = torch.aten.convolution %1624, %559, %558, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1626 = torch.aten.add.Tensor %1562, %1625, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1627 = torch.aten.div.Scalar %1626, %float1.000000e00 : !torch.vtensor<[2,640,32,32],f16>, !torch.float -> !torch.vtensor<[2,640,32,32],f16> | |
%1628 = torch.aten.view %1627, %1315 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1629 = torch.aten.to.dtype %1628, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,1024],f64> | |
%1630 = torch.aten.sum.dim_IntList %1629, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1631 = torch.aten.div.Scalar %1630, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1632 = torch.aten.sub.Tensor %1629, %1631, %float1.000000e00 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,1024],f64> | |
%1633 = torch.aten.mul.Tensor %1632, %1632 : !torch.vtensor<[2,32,20,1024],f64>, !torch.vtensor<[2,32,20,1024],f64> -> !torch.vtensor<[2,32,20,1024],f64> | |
%1634 = torch.aten.sum.dim_IntList %1633, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1635 = torch.aten.div.Scalar %1634, %int20480 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1636 = torch.aten.to.dtype %1635, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1637 = torch.aten.sum.dim_IntList %1628, %721, %true, %none : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1638 = torch.aten.div.Scalar %1637, %int20480 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1639 = torch.aten.add.Scalar %1636, %float9.999990e-07, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1640 = torch.aten.rsqrt %1639 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1641 = torch.aten.sub.Tensor %1628, %1638, %int1 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,20,1024],f16> | |
%1642 = torch.aten.mul.Tensor %1641, %1640 : !torch.vtensor<[2,32,20,1024],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,20,1024],f16> | |
%1643 = torch.aten.view %1642, %1331 : !torch.vtensor<[2,32,20,1024],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> | |
%1644 = torch.aten.unsqueeze %557, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1645 = torch.aten.unsqueeze %1644, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1646 = torch.aten.unsqueeze %1645, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1647 = torch.aten.unsqueeze %556, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1648 = torch.aten.unsqueeze %1647, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1649 = torch.aten.unsqueeze %1648, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1650 = torch.aten.mul.Tensor %1643, %1649 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[2,640,32,32],f16> | |
%1651 = torch.aten.add.Tensor %1650, %1646, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1652 = torch.aten.permute %1651, %811 : !torch.vtensor<[2,640,32,32],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> | |
%1653 = torch.aten.view %1652, %1372 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1654 = torch.aten.transpose.int %555, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1655 = torch.aten.broadcast_to %1653, %1372 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1656 = torch.aten.view %1655, %1372 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1657 = torch.aten.broadcast_to %1654, %1377 : !torch.vtensor<[640,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,640],f16> | |
%1658 = torch.aten.view %1657, %1377 : !torch.vtensor<[2,640,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,640],f16> | |
%1659 = torch.aten.bmm %1656, %1658 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,640,640],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1660 = torch.aten.view %1659, %1372 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1661 = torch.aten.add.Tensor %1660, %554, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1662 = torch.aten.to.dtype %1661, %int7, %false, %false, %none : !torch.vtensor<[2,1024,640],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,640],f64> | |
%1663 = torch.aten.sum.dim_IntList %1662, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1664 = torch.aten.div.Scalar %1663, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1665 = torch.aten.sub.Tensor %1662, %1664, %float1.000000e00 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,1],f64>, !torch.float -> !torch.vtensor<[2,1024,640],f64> | |
%1666 = torch.aten.mul.Tensor %1665, %1665 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,640],f64> -> !torch.vtensor<[2,1024,640],f64> | |
%1667 = torch.aten.sum.dim_IntList %1666, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1668 = torch.aten.div.Scalar %1667, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1669 = torch.aten.to.dtype %1668, %int5, %false, %false, %none : !torch.vtensor<[2,1024,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1670 = torch.aten.sum.dim_IntList %1661, %688, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1671 = torch.aten.div.Scalar %1670, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1672 = torch.aten.add.Scalar %1669, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1673 = torch.aten.rsqrt %1672 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> | |
%1674 = torch.aten.sub.Tensor %1661, %1671, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1675 = torch.aten.mul.Tensor %1674, %1673 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1676 = torch.aten.mul.Tensor %1675, %553 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1677 = torch.aten.add.Tensor %1676, %552, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1678 = torch.aten.transpose.int %551, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1679 = torch.aten.view %1677, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1680 = torch.aten.mm %1679, %1678 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1681 = torch.aten.view %1680, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1682 = torch.aten.view %1681, %1404 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1683 = torch.aten.permute %1682, %847 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1684 = torch.aten.clone %1683, %int0 : !torch.vtensor<[2,10,1024,64],f16>, !torch.int -> !torch.vtensor<[2,10,1024,64],f16> | |
%1685 = torch.aten.view %1684, %1408 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[20,1024,64],f16> | |
%1686 = torch.aten.transpose.int %550, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1687 = torch.aten.view %1677, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1688 = torch.aten.mm %1687, %1686 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1689 = torch.aten.view %1688, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1690 = torch.aten.transpose.int %549, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1691 = torch.aten.view %1677, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1692 = torch.aten.mm %1691, %1690 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1693 = torch.aten.view %1692, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1694 = torch.aten.view %1689, %1404 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1695 = torch.aten.permute %1694, %847 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1696 = torch.aten.clone %1695, %int0 : !torch.vtensor<[2,10,1024,64],f16>, !torch.int -> !torch.vtensor<[2,10,1024,64],f16> | |
%1697 = torch.aten.view %1696, %1408 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[20,1024,64],f16> | |
%1698 = torch.aten.view %1693, %1404 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1699 = torch.aten.permute %1698, %847 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1700 = torch.aten.clone %1699, %int0 : !torch.vtensor<[2,10,1024,64],f16>, !torch.int -> !torch.vtensor<[2,10,1024,64],f16> | |
%1701 = torch.aten.view %1700, %1408 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[20,1024,64],f16> | |
%1702 = torch.aten.empty.memory_format %1426, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,1024],f32> | |
%1703 = torch.aten.transpose.int %1697, %int-1, %int-2 : !torch.vtensor<[20,1024,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[20,64,1024],f16> | |
%1704 = torch.aten.bmm %1685, %1703 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,64,1024],f16> -> !torch.vtensor<[20,1024,1024],f16> | |
%1705 = torch.aten.mul.Scalar %1704, %float1.250000e-01 : !torch.vtensor<[20,1024,1024],f16>, !torch.float -> !torch.vtensor<[20,1024,1024],f16> | |
%1706 = torch.aten.to.dtype %1702, %int5, %false, %false, %none : !torch.vtensor<[20,1024,1024],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,1024],f16> | |
%1707 = torch.aten.add.Tensor %1705, %1702, %int0 : !torch.vtensor<[20,1024,1024],f16>, !torch.vtensor<[20,1024,1024],f32>, !torch.int -> !torch.vtensor<[20,1024,1024],f16> | |
%values_10, %indices_11 = torch.aten.max.dim %1707, %int-1, %true : !torch.vtensor<[20,1024,1024],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,1024,1],f16>, !torch.vtensor<[20,1024,1],si64> | |
%1708 = torch.aten.sub.Tensor %1707, %values_10, %float1.000000e00 : !torch.vtensor<[20,1024,1024],f16>, !torch.vtensor<[20,1024,1],f16>, !torch.float -> !torch.vtensor<[20,1024,1024],f16> | |
%1709 = torch.aten.exp %1708 : !torch.vtensor<[20,1024,1024],f16> -> !torch.vtensor<[20,1024,1024],f16> | |
%1710 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%1711 = torch.aten.sum.dim_IntList %1709, %1710, %true, %none : !torch.vtensor<[20,1024,1024],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,1],f16> | |
%1712 = torch.aten.div.Tensor %1709, %1711 : !torch.vtensor<[20,1024,1024],f16>, !torch.vtensor<[20,1024,1],f16> -> !torch.vtensor<[20,1024,1024],f16> | |
%1713 = torch.aten.bmm %1712, %1701 : !torch.vtensor<[20,1024,1024],f16>, !torch.vtensor<[20,1024,64],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1714 = torch.aten.view %1713, %1439 : !torch.vtensor<[20,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1715 = torch.aten.permute %1714, %847 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1716 = torch.aten.clone %1715, %int0 : !torch.vtensor<[2,1024,10,64],f16>, !torch.int -> !torch.vtensor<[2,1024,10,64],f16> | |
%1717 = torch.aten.view %1716, %1372 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1718 = torch.aten.transpose.int %548, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1719 = torch.aten.view %1717, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1720 = torch.aten.mm %1719, %1718 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1721 = torch.aten.mul.Scalar %547, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1722 = torch.aten.add.Tensor %1721, %1720, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> | |
%1723 = torch.aten.view %1722, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1724 = torch.aten.add.Tensor %1723, %1661, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1725 = torch.aten.to.dtype %1724, %int7, %false, %false, %none : !torch.vtensor<[2,1024,640],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,640],f64> | |
%1726 = torch.aten.sum.dim_IntList %1725, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1727 = torch.aten.div.Scalar %1726, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1728 = torch.aten.sub.Tensor %1725, %1727, %float1.000000e00 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,1],f64>, !torch.float -> !torch.vtensor<[2,1024,640],f64> | |
%1729 = torch.aten.mul.Tensor %1728, %1728 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,640],f64> -> !torch.vtensor<[2,1024,640],f64> | |
%1730 = torch.aten.sum.dim_IntList %1729, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1731 = torch.aten.div.Scalar %1730, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1732 = torch.aten.to.dtype %1731, %int5, %false, %false, %none : !torch.vtensor<[2,1024,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1733 = torch.aten.sum.dim_IntList %1724, %688, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1734 = torch.aten.div.Scalar %1733, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1735 = torch.aten.add.Scalar %1732, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1736 = torch.aten.rsqrt %1735 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> | |
%1737 = torch.aten.sub.Tensor %1724, %1734, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1738 = torch.aten.mul.Tensor %1737, %1736 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1739 = torch.aten.mul.Tensor %1738, %546 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1740 = torch.aten.add.Tensor %1739, %545, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1741 = torch.aten.transpose.int %544, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1742 = torch.aten.view %1740, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1743 = torch.aten.mm %1742, %1741 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1744 = torch.aten.view %1743, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1745 = torch.aten.view %1744, %1404 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1746 = torch.aten.permute %1745, %847 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1747 = torch.aten.clone %1746, %int0 : !torch.vtensor<[2,10,1024,64],f16>, !torch.int -> !torch.vtensor<[2,10,1024,64],f16> | |
%1748 = torch.aten.view %1747, %1408 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[20,1024,64],f16> | |
%1749 = torch.aten.transpose.int %543, %int0, %int1 : !torch.vtensor<[640,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,640],f16> | |
%1750 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%1751 = torch.aten.mm %1750, %1749 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[128,640],f16> | |
%1752 = torch.aten.view %1751, %1478 : !torch.vtensor<[128,640],f16>, !torch.list<int> -> !torch.vtensor<[2,64,640],f16> | |
%1753 = torch.aten.transpose.int %542, %int0, %int1 : !torch.vtensor<[640,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,640],f16> | |
%1754 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%1755 = torch.aten.mm %1754, %1753 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,640],f16> -> !torch.vtensor<[128,640],f16> | |
%1756 = torch.aten.view %1755, %1478 : !torch.vtensor<[128,640],f16>, !torch.list<int> -> !torch.vtensor<[2,64,640],f16> | |
%1757 = torch.aten.view %1752, %1484 : !torch.vtensor<[2,64,640],f16>, !torch.list<int> -> !torch.vtensor<[2,64,10,64],f16> | |
%1758 = torch.aten.permute %1757, %847 : !torch.vtensor<[2,64,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,64,64],f16> | |
%1759 = torch.aten.clone %1758, %int0 : !torch.vtensor<[2,10,64,64],f16>, !torch.int -> !torch.vtensor<[2,10,64,64],f16> | |
%1760 = torch.aten.view %1759, %1488 : !torch.vtensor<[2,10,64,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,64],f16> | |
%1761 = torch.aten.view %1756, %1484 : !torch.vtensor<[2,64,640],f16>, !torch.list<int> -> !torch.vtensor<[2,64,10,64],f16> | |
%1762 = torch.aten.permute %1761, %847 : !torch.vtensor<[2,64,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,64,64],f16> | |
%1763 = torch.aten.clone %1762, %int0 : !torch.vtensor<[2,10,64,64],f16>, !torch.int -> !torch.vtensor<[2,10,64,64],f16> | |
%1764 = torch.aten.view %1763, %1488 : !torch.vtensor<[2,10,64,64],f16>, !torch.list<int> -> !torch.vtensor<[20,64,64],f16> | |
%1765 = torch.aten.empty.memory_format %1408, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,64],f32> | |
%1766 = torch.aten.transpose.int %1760, %int-1, %int-2 : !torch.vtensor<[20,64,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[20,64,64],f16> | |
%1767 = torch.aten.bmm %1748, %1766 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,64,64],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1768 = torch.aten.mul.Scalar %1767, %float1.250000e-01 : !torch.vtensor<[20,1024,64],f16>, !torch.float -> !torch.vtensor<[20,1024,64],f16> | |
%1769 = torch.aten.to.dtype %1765, %int5, %false, %false, %none : !torch.vtensor<[20,1024,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,64],f16> | |
%1770 = torch.aten.add.Tensor %1768, %1765, %int0 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,1024,64],f32>, !torch.int -> !torch.vtensor<[20,1024,64],f16> | |
%values_12, %indices_13 = torch.aten.max.dim %1770, %int-1, %true : !torch.vtensor<[20,1024,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[20,1024,1],f16>, !torch.vtensor<[20,1024,1],si64> | |
%1771 = torch.aten.sub.Tensor %1770, %values_12, %float1.000000e00 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,1024,1],f16>, !torch.float -> !torch.vtensor<[20,1024,64],f16> | |
%1772 = torch.aten.exp %1771 : !torch.vtensor<[20,1024,64],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1773 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%1774 = torch.aten.sum.dim_IntList %1772, %1773, %true, %none : !torch.vtensor<[20,1024,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[20,1024,1],f16> | |
%1775 = torch.aten.div.Tensor %1772, %1774 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,1024,1],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1776 = torch.aten.bmm %1775, %1764 : !torch.vtensor<[20,1024,64],f16>, !torch.vtensor<[20,64,64],f16> -> !torch.vtensor<[20,1024,64],f16> | |
%1777 = torch.aten.view %1776, %1439 : !torch.vtensor<[20,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[2,10,1024,64],f16> | |
%1778 = torch.aten.permute %1777, %847 : !torch.vtensor<[2,10,1024,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,10,64],f16> | |
%1779 = torch.aten.clone %1778, %int0 : !torch.vtensor<[2,1024,10,64],f16>, !torch.int -> !torch.vtensor<[2,1024,10,64],f16> | |
%1780 = torch.aten.view %1779, %1372 : !torch.vtensor<[2,1024,10,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1781 = torch.aten.transpose.int %541, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1782 = torch.aten.view %1780, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1783 = torch.aten.mm %1782, %1781 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1784 = torch.aten.mul.Scalar %540, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1785 = torch.aten.add.Tensor %1784, %1783, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> | |
%1786 = torch.aten.view %1785, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1787 = torch.aten.add.Tensor %1786, %1724, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1788 = torch.aten.to.dtype %1787, %int7, %false, %false, %none : !torch.vtensor<[2,1024,640],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,640],f64> | |
%1789 = torch.aten.sum.dim_IntList %1788, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1790 = torch.aten.div.Scalar %1789, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1791 = torch.aten.sub.Tensor %1788, %1790, %float1.000000e00 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,1],f64>, !torch.float -> !torch.vtensor<[2,1024,640],f64> | |
%1792 = torch.aten.mul.Tensor %1791, %1791 : !torch.vtensor<[2,1024,640],f64>, !torch.vtensor<[2,1024,640],f64> -> !torch.vtensor<[2,1024,640],f64> | |
%1793 = torch.aten.sum.dim_IntList %1792, %688, %true, %none : !torch.vtensor<[2,1024,640],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f64> | |
%1794 = torch.aten.div.Scalar %1793, %int640 : !torch.vtensor<[2,1024,1],f64>, !torch.int -> !torch.vtensor<[2,1024,1],f64> | |
%1795 = torch.aten.to.dtype %1794, %int5, %false, %false, %none : !torch.vtensor<[2,1024,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1796 = torch.aten.sum.dim_IntList %1787, %688, %true, %none : !torch.vtensor<[2,1024,640],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,1024,1],f16> | |
%1797 = torch.aten.div.Scalar %1796, %int640 : !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1798 = torch.aten.add.Scalar %1795, %float1.000000e-05, %int1 : !torch.vtensor<[2,1024,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,1024,1],f16> | |
%1799 = torch.aten.rsqrt %1798 : !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,1],f16> | |
%1800 = torch.aten.sub.Tensor %1787, %1797, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1801 = torch.aten.mul.Tensor %1800, %1799 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,1],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1802 = torch.aten.mul.Tensor %1801, %539 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16> -> !torch.vtensor<[2,1024,640],f16> | |
%1803 = torch.aten.add.Tensor %1802, %538, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1804 = torch.aten.transpose.int %537, %int0, %int1 : !torch.vtensor<[5120,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,5120],f16> | |
%1805 = torch.aten.view %1803, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1806 = torch.aten.mm %1805, %1804 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,5120],f16> -> !torch.vtensor<[2048,5120],f16> | |
%1807 = torch.aten.mul.Scalar %536, %int1 : !torch.vtensor<[5120],f16>, !torch.int -> !torch.vtensor<[5120],f16> | |
%1808 = torch.aten.add.Tensor %1807, %1806, %int1 : !torch.vtensor<[5120],f16>, !torch.vtensor<[2048,5120],f16>, !torch.int -> !torch.vtensor<[2048,5120],f16> | |
%1809 = torch.aten.view %1808, %1538 : !torch.vtensor<[2048,5120],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,5120],f16> | |
%1810 = torch.aten.slice.Tensor %1809, %int-1, %int0, %int2560, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> | |
%1811 = torch.aten.slice.Tensor %1809, %int-1, %int2560, %int5120, %int1 : !torch.vtensor<[2,1024,5120],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,1024,2560],f16> | |
%1812 = torch.aten.gelu %1811, %str : !torch.vtensor<[2,1024,2560],f16>, !torch.str -> !torch.vtensor<[2,1024,2560],f16> | |
%1813 = torch.aten.mul.Tensor %1810, %1812 : !torch.vtensor<[2,1024,2560],f16>, !torch.vtensor<[2,1024,2560],f16> -> !torch.vtensor<[2,1024,2560],f16> | |
%1814 = torch.aten.transpose.int %535, %int0, %int1 : !torch.vtensor<[640,2560],f16>, !torch.int, !torch.int -> !torch.vtensor<[2560,640],f16> | |
%1815 = torch.aten.view %1813, %1545 : !torch.vtensor<[2,1024,2560],f16>, !torch.list<int> -> !torch.vtensor<[2048,2560],f16> | |
%1816 = torch.aten.mm %1815, %1814 : !torch.vtensor<[2048,2560],f16>, !torch.vtensor<[2560,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1817 = torch.aten.mul.Scalar %534, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1818 = torch.aten.add.Tensor %1817, %1816, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> | |
%1819 = torch.aten.view %1818, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1820 = torch.aten.add.Tensor %1819, %1787, %int1 : !torch.vtensor<[2,1024,640],f16>, !torch.vtensor<[2,1024,640],f16>, !torch.int -> !torch.vtensor<[2,1024,640],f16> | |
%1821 = torch.aten.transpose.int %533, %int0, %int1 : !torch.vtensor<[640,640],f16>, !torch.int, !torch.int -> !torch.vtensor<[640,640],f16> | |
%1822 = torch.aten.view %1820, %1400 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2048,640],f16> | |
%1823 = torch.aten.mm %1822, %1821 : !torch.vtensor<[2048,640],f16>, !torch.vtensor<[640,640],f16> -> !torch.vtensor<[2048,640],f16> | |
%1824 = torch.aten.mul.Scalar %532, %int1 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[640],f16> | |
%1825 = torch.aten.add.Tensor %1824, %1823, %int1 : !torch.vtensor<[640],f16>, !torch.vtensor<[2048,640],f16>, !torch.int -> !torch.vtensor<[2048,640],f16> | |
%1826 = torch.aten.view %1825, %1372 : !torch.vtensor<[2048,640],f16>, !torch.list<int> -> !torch.vtensor<[2,1024,640],f16> | |
%1827 = torch.aten.view %1826, %1558 : !torch.vtensor<[2,1024,640],f16>, !torch.list<int> -> !torch.vtensor<[2,32,32,640],f16> | |
%1828 = torch.aten.permute %1827, %1003 : !torch.vtensor<[2,32,32,640],f16>, !torch.list<int> -> !torch.vtensor<[2,640,32,32],f16> | |
%1829 = torch.aten.clone %1828, %int0 : !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1830 = torch.aten.add.Tensor %1829, %1627, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[2,640,32,32],f16>, !torch.int -> !torch.vtensor<[2,640,32,32],f16> | |
%1831 = torch.aten.convolution %1830, %531, %530, %1275, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,640,32,32],f16>, !torch.vtensor<[640,640,3,3],f16>, !torch.vtensor<[640],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> | |
%1832 = torch.prim.ListConstruct %int2, %int32, %int20, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1833 = torch.aten.view %1831, %1832 : !torch.vtensor<[2,640,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,20,256],f16> | |
%1834 = torch.aten.to.dtype %1833, %int7, %false, %false, %none : !torch.vtensor<[2,32,20,256],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,20,256],f64> | |
%1835 = torch.aten.sum.dim_IntList %1834, %721, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1836 = torch.aten.div.Scalar %1835, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1837 = torch.aten.sub.Tensor %1834, %1836, %float1.000000e00 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,20,256],f64> | |
%1838 = torch.aten.mul.Tensor %1837, %1837 : !torch.vtensor<[2,32,20,256],f64>, !torch.vtensor<[2,32,20,256],f64> -> !torch.vtensor<[2,32,20,256],f64> | |
%1839 = torch.aten.sum.dim_IntList %1838, %721, %true, %none : !torch.vtensor<[2,32,20,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1840 = torch.aten.div.Scalar %1839, %int5120 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1841 = torch.aten.to.dtype %1840, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1842 = torch.aten.sum.dim_IntList %1833, %721, %true, %none : !torch.vtensor<[2,32,20,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1843 = torch.aten.div.Scalar %1842, %int5120 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1844 = torch.aten.add.Scalar %1841, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1845 = torch.aten.rsqrt %1844 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1846 = torch.aten.sub.Tensor %1833, %1843, %int1 : !torch.vtensor<[2,32,20,256],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,20,256],f16> | |
%1847 = torch.aten.mul.Tensor %1846, %1845 : !torch.vtensor<[2,32,20,256],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,20,256],f16> | |
%1848 = torch.prim.ListConstruct %int2, %int640, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1849 = torch.aten.view %1847, %1848 : !torch.vtensor<[2,32,20,256],f16>, !torch.list<int> -> !torch.vtensor<[2,640,16,16],f16> | |
%1850 = torch.aten.unsqueeze %529, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1851 = torch.aten.unsqueeze %1850, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1852 = torch.aten.unsqueeze %1851, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1853 = torch.aten.unsqueeze %528, %int0 : !torch.vtensor<[640],f16>, !torch.int -> !torch.vtensor<[1,640],f16> | |
%1854 = torch.aten.unsqueeze %1853, %int2 : !torch.vtensor<[1,640],f16>, !torch.int -> !torch.vtensor<[1,640,1],f16> | |
%1855 = torch.aten.unsqueeze %1854, %int3 : !torch.vtensor<[1,640,1],f16>, !torch.int -> !torch.vtensor<[1,640,1,1],f16> | |
%1856 = torch.aten.mul.Tensor %1849, %1855 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1,640,1,1],f16> -> !torch.vtensor<[2,640,16,16],f16> | |
%1857 = torch.aten.add.Tensor %1856, %1852, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1,640,1,1],f16>, !torch.int -> !torch.vtensor<[2,640,16,16],f16> | |
%1858 = torch.aten.sigmoid %1857 : !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> | |
%1859 = torch.aten.mul.Tensor %1858, %1857 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[2,640,16,16],f16> -> !torch.vtensor<[2,640,16,16],f16> | |
%1860 = torch.aten.convolution %1859, %527, %526, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%1861 = torch.aten.sigmoid %715 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%1862 = torch.aten.mul.Tensor %1861, %715 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%1863 = torch.aten.transpose.int %525, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%1864 = torch.aten.mm %1862, %1863 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%1865 = torch.aten.mul.Scalar %524, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%1866 = torch.aten.add.Tensor %1865, %1864, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> | |
%1867 = torch.aten.unsqueeze %1866, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16> | |
%1868 = torch.aten.unsqueeze %1867, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16> | |
%1869 = torch.aten.add.Tensor %1860, %1868, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%1870 = torch.prim.ListConstruct %int2, %int32, %int40, %int256 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1871 = torch.aten.view %1869, %1870 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> | |
%1872 = torch.aten.to.dtype %1871, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> | |
%1873 = torch.aten.sum.dim_IntList %1872, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1874 = torch.aten.div.Scalar %1873, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1875 = torch.aten.sub.Tensor %1872, %1874, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> | |
%1876 = torch.aten.mul.Tensor %1875, %1875 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> | |
%1877 = torch.aten.sum.dim_IntList %1876, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1878 = torch.aten.div.Scalar %1877, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1879 = torch.aten.to.dtype %1878, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1880 = torch.aten.sum.dim_IntList %1871, %721, %true, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1881 = torch.aten.div.Scalar %1880, %int10240 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1882 = torch.aten.add.Scalar %1879, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1883 = torch.aten.rsqrt %1882 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1884 = torch.aten.sub.Tensor %1871, %1881, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,256],f16> | |
%1885 = torch.aten.mul.Tensor %1884, %1883 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,256],f16> | |
%1886 = torch.prim.ListConstruct %int2, %int1280, %int16, %int16 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1887 = torch.aten.view %1885, %1886 : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> | |
%1888 = torch.aten.unsqueeze %523, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%1889 = torch.aten.unsqueeze %1888, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%1890 = torch.aten.unsqueeze %1889, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%1891 = torch.aten.unsqueeze %522, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%1892 = torch.aten.unsqueeze %1891, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%1893 = torch.aten.unsqueeze %1892, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%1894 = torch.aten.mul.Tensor %1887, %1893 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%1895 = torch.aten.add.Tensor %1894, %1890, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%1896 = torch.aten.sigmoid %1895 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%1897 = torch.aten.mul.Tensor %1896, %1895 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%1898 = torch.aten.convolution %1897, %521, %520, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%1899 = torch.aten.convolution %1831, %519, %518, %716, %717, %716, %false, %717, %int1 : !torch.vtensor<[2,640,16,16],f16>, !torch.vtensor<[1280,640,1,1],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%1900 = torch.aten.add.Tensor %1899, %1898, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%1901 = torch.aten.div.Scalar %1900, %float1.000000e00 : !torch.vtensor<[2,1280,16,16],f16>, !torch.float -> !torch.vtensor<[2,1280,16,16],f16> | |
%1902 = torch.aten.view %1901, %1870 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> | |
%1903 = torch.aten.to.dtype %1902, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> | |
%1904 = torch.aten.sum.dim_IntList %1903, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1905 = torch.aten.div.Scalar %1904, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1906 = torch.aten.sub.Tensor %1903, %1905, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> | |
%1907 = torch.aten.mul.Tensor %1906, %1906 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> | |
%1908 = torch.aten.sum.dim_IntList %1907, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%1909 = torch.aten.div.Scalar %1908, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%1910 = torch.aten.to.dtype %1909, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1911 = torch.aten.sum.dim_IntList %1902, %721, %true, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%1912 = torch.aten.div.Scalar %1911, %int10240 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1913 = torch.aten.add.Scalar %1910, %float9.999990e-07, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%1914 = torch.aten.rsqrt %1913 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%1915 = torch.aten.sub.Tensor %1902, %1912, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,256],f16> | |
%1916 = torch.aten.mul.Tensor %1915, %1914 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,256],f16> | |
%1917 = torch.aten.view %1916, %1886 : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> | |
%1918 = torch.aten.unsqueeze %517, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%1919 = torch.aten.unsqueeze %1918, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%1920 = torch.aten.unsqueeze %1919, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%1921 = torch.aten.unsqueeze %516, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%1922 = torch.aten.unsqueeze %1921, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%1923 = torch.aten.unsqueeze %1922, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%1924 = torch.aten.mul.Tensor %1917, %1923 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%1925 = torch.aten.add.Tensor %1924, %1920, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%1926 = torch.aten.permute %1925, %811 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> | |
%1927 = torch.prim.ListConstruct %int2, %int256, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1928 = torch.aten.view %1926, %1927 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%1929 = torch.aten.transpose.int %515, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%1930 = torch.aten.broadcast_to %1928, %1927 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%1931 = torch.aten.view %1930, %1927 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%1932 = torch.prim.ListConstruct %int2, %int1280, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1933 = torch.aten.broadcast_to %1929, %1932 : !torch.vtensor<[1280,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,1280],f16> | |
%1934 = torch.aten.view %1933, %1932 : !torch.vtensor<[2,1280,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,1280],f16> | |
%1935 = torch.aten.bmm %1931, %1934 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,1280,1280],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%1936 = torch.aten.view %1935, %1927 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%1937 = torch.aten.add.Tensor %1936, %514, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%1938 = torch.aten.to.dtype %1937, %int7, %false, %false, %none : !torch.vtensor<[2,256,1280],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1280],f64> | |
%1939 = torch.aten.sum.dim_IntList %1938, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%1940 = torch.aten.div.Scalar %1939, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%1941 = torch.aten.sub.Tensor %1938, %1940, %float1.000000e00 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1],f64>, !torch.float -> !torch.vtensor<[2,256,1280],f64> | |
%1942 = torch.aten.mul.Tensor %1941, %1941 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1280],f64> -> !torch.vtensor<[2,256,1280],f64> | |
%1943 = torch.aten.sum.dim_IntList %1942, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%1944 = torch.aten.div.Scalar %1943, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%1945 = torch.aten.to.dtype %1944, %int5, %false, %false, %none : !torch.vtensor<[2,256,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%1946 = torch.aten.sum.dim_IntList %1937, %688, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%1947 = torch.aten.div.Scalar %1946, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%1948 = torch.aten.add.Scalar %1945, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%1949 = torch.aten.rsqrt %1948 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> | |
%1950 = torch.aten.sub.Tensor %1937, %1947, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%1951 = torch.aten.mul.Tensor %1950, %1949 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%1952 = torch.aten.mul.Tensor %1951, %513 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%1953 = torch.aten.add.Tensor %1952, %512, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%1954 = torch.aten.transpose.int %511, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%1955 = torch.prim.ListConstruct %int512, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> | |
%1956 = torch.aten.view %1953, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%1957 = torch.aten.mm %1956, %1954 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%1958 = torch.aten.view %1957, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%1959 = torch.prim.ListConstruct %int2, %int256, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1960 = torch.aten.view %1958, %1959 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%1961 = torch.aten.permute %1960, %847 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%1962 = torch.aten.clone %1961, %int0 : !torch.vtensor<[2,20,256,64],f16>, !torch.int -> !torch.vtensor<[2,20,256,64],f16> | |
%1963 = torch.prim.ListConstruct %int40, %int256, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1964 = torch.aten.view %1962, %1963 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[40,256,64],f16> | |
%1965 = torch.aten.transpose.int %510, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%1966 = torch.aten.view %1953, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%1967 = torch.aten.mm %1966, %1965 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%1968 = torch.aten.view %1967, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%1969 = torch.aten.transpose.int %509, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%1970 = torch.aten.view %1953, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%1971 = torch.aten.mm %1970, %1969 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%1972 = torch.aten.view %1971, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%1973 = torch.aten.view %1968, %1959 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%1974 = torch.aten.permute %1973, %847 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%1975 = torch.aten.clone %1974, %int0 : !torch.vtensor<[2,20,256,64],f16>, !torch.int -> !torch.vtensor<[2,20,256,64],f16> | |
%1976 = torch.aten.view %1975, %1963 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[40,256,64],f16> | |
%1977 = torch.aten.view %1972, %1959 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%1978 = torch.aten.permute %1977, %847 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%1979 = torch.aten.clone %1978, %int0 : !torch.vtensor<[2,20,256,64],f16>, !torch.int -> !torch.vtensor<[2,20,256,64],f16> | |
%1980 = torch.aten.view %1979, %1963 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[40,256,64],f16> | |
%1981 = torch.prim.ListConstruct %int40, %int256, %int256 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1982 = torch.aten.empty.memory_format %1981, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,256,256],f32> | |
%1983 = torch.aten.transpose.int %1976, %int-1, %int-2 : !torch.vtensor<[40,256,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[40,64,256],f16> | |
%1984 = torch.aten.bmm %1964, %1983 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,64,256],f16> -> !torch.vtensor<[40,256,256],f16> | |
%1985 = torch.aten.mul.Scalar %1984, %float1.250000e-01 : !torch.vtensor<[40,256,256],f16>, !torch.float -> !torch.vtensor<[40,256,256],f16> | |
%1986 = torch.aten.to.dtype %1982, %int5, %false, %false, %none : !torch.vtensor<[40,256,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,256,256],f16> | |
%1987 = torch.aten.add.Tensor %1985, %1982, %int0 : !torch.vtensor<[40,256,256],f16>, !torch.vtensor<[40,256,256],f32>, !torch.int -> !torch.vtensor<[40,256,256],f16> | |
%values_14, %indices_15 = torch.aten.max.dim %1987, %int-1, %true : !torch.vtensor<[40,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,256,1],f16>, !torch.vtensor<[40,256,1],si64> | |
%1988 = torch.aten.sub.Tensor %1987, %values_14, %float1.000000e00 : !torch.vtensor<[40,256,256],f16>, !torch.vtensor<[40,256,1],f16>, !torch.float -> !torch.vtensor<[40,256,256],f16> | |
%1989 = torch.aten.exp %1988 : !torch.vtensor<[40,256,256],f16> -> !torch.vtensor<[40,256,256],f16> | |
%1990 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%1991 = torch.aten.sum.dim_IntList %1989, %1990, %true, %none : !torch.vtensor<[40,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,256,1],f16> | |
%1992 = torch.aten.div.Tensor %1989, %1991 : !torch.vtensor<[40,256,256],f16>, !torch.vtensor<[40,256,1],f16> -> !torch.vtensor<[40,256,256],f16> | |
%1993 = torch.aten.bmm %1992, %1980 : !torch.vtensor<[40,256,256],f16>, !torch.vtensor<[40,256,64],f16> -> !torch.vtensor<[40,256,64],f16> | |
%1994 = torch.prim.ListConstruct %int2, %int20, %int256, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%1995 = torch.aten.view %1993, %1994 : !torch.vtensor<[40,256,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%1996 = torch.aten.permute %1995, %847 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%1997 = torch.aten.clone %1996, %int0 : !torch.vtensor<[2,256,20,64],f16>, !torch.int -> !torch.vtensor<[2,256,20,64],f16> | |
%1998 = torch.aten.view %1997, %1927 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%1999 = torch.aten.transpose.int %508, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2000 = torch.aten.view %1998, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2001 = torch.aten.mm %2000, %1999 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2002 = torch.aten.mul.Scalar %507, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2003 = torch.aten.add.Tensor %2002, %2001, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> | |
%2004 = torch.aten.view %2003, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2005 = torch.aten.add.Tensor %2004, %1937, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2006 = torch.aten.to.dtype %2005, %int7, %false, %false, %none : !torch.vtensor<[2,256,1280],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1280],f64> | |
%2007 = torch.aten.sum.dim_IntList %2006, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2008 = torch.aten.div.Scalar %2007, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2009 = torch.aten.sub.Tensor %2006, %2008, %float1.000000e00 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1],f64>, !torch.float -> !torch.vtensor<[2,256,1280],f64> | |
%2010 = torch.aten.mul.Tensor %2009, %2009 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1280],f64> -> !torch.vtensor<[2,256,1280],f64> | |
%2011 = torch.aten.sum.dim_IntList %2010, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2012 = torch.aten.div.Scalar %2011, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2013 = torch.aten.to.dtype %2012, %int5, %false, %false, %none : !torch.vtensor<[2,256,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2014 = torch.aten.sum.dim_IntList %2005, %688, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2015 = torch.aten.div.Scalar %2014, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2016 = torch.aten.add.Scalar %2013, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2017 = torch.aten.rsqrt %2016 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> | |
%2018 = torch.aten.sub.Tensor %2005, %2015, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2019 = torch.aten.mul.Tensor %2018, %2017 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2020 = torch.aten.mul.Tensor %2019, %506 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2021 = torch.aten.add.Tensor %2020, %505, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2022 = torch.aten.transpose.int %504, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2023 = torch.aten.view %2021, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2024 = torch.aten.mm %2023, %2022 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2025 = torch.aten.view %2024, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2026 = torch.aten.view %2025, %1959 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%2027 = torch.aten.permute %2026, %847 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%2028 = torch.aten.clone %2027, %int0 : !torch.vtensor<[2,20,256,64],f16>, !torch.int -> !torch.vtensor<[2,20,256,64],f16> | |
%2029 = torch.aten.view %2028, %1963 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[40,256,64],f16> | |
%2030 = torch.aten.transpose.int %503, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16> | |
%2031 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%2032 = torch.aten.mm %2031, %2030 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2033 = torch.prim.ListConstruct %int2, %int64, %int1280 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2034 = torch.aten.view %2032, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2035 = torch.aten.transpose.int %502, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16> | |
%2036 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%2037 = torch.aten.mm %2036, %2035 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2038 = torch.aten.view %2037, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2039 = torch.prim.ListConstruct %int2, %int64, %int20, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2040 = torch.aten.view %2034, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2041 = torch.aten.permute %2040, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2042 = torch.aten.clone %2041, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2043 = torch.prim.ListConstruct %int40, %int64, %int64 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2044 = torch.aten.view %2042, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2045 = torch.aten.view %2038, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2046 = torch.aten.permute %2045, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2047 = torch.aten.clone %2046, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2048 = torch.aten.view %2047, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2049 = torch.aten.empty.memory_format %1963, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,256,64],f32> | |
%2050 = torch.aten.transpose.int %2044, %int-1, %int-2 : !torch.vtensor<[40,64,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[40,64,64],f16> | |
%2051 = torch.aten.bmm %2029, %2050 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,256,64],f16> | |
%2052 = torch.aten.mul.Scalar %2051, %float1.250000e-01 : !torch.vtensor<[40,256,64],f16>, !torch.float -> !torch.vtensor<[40,256,64],f16> | |
%2053 = torch.aten.to.dtype %2049, %int5, %false, %false, %none : !torch.vtensor<[40,256,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,256,64],f16> | |
%2054 = torch.aten.add.Tensor %2052, %2049, %int0 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,256,64],f32>, !torch.int -> !torch.vtensor<[40,256,64],f16> | |
%values_16, %indices_17 = torch.aten.max.dim %2054, %int-1, %true : !torch.vtensor<[40,256,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,256,1],f16>, !torch.vtensor<[40,256,1],si64> | |
%2055 = torch.aten.sub.Tensor %2054, %values_16, %float1.000000e00 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,256,1],f16>, !torch.float -> !torch.vtensor<[40,256,64],f16> | |
%2056 = torch.aten.exp %2055 : !torch.vtensor<[40,256,64],f16> -> !torch.vtensor<[40,256,64],f16> | |
%2057 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%2058 = torch.aten.sum.dim_IntList %2056, %2057, %true, %none : !torch.vtensor<[40,256,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,256,1],f16> | |
%2059 = torch.aten.div.Tensor %2056, %2058 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,256,1],f16> -> !torch.vtensor<[40,256,64],f16> | |
%2060 = torch.aten.bmm %2059, %2048 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,256,64],f16> | |
%2061 = torch.aten.view %2060, %1994 : !torch.vtensor<[40,256,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%2062 = torch.aten.permute %2061, %847 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%2063 = torch.aten.clone %2062, %int0 : !torch.vtensor<[2,256,20,64],f16>, !torch.int -> !torch.vtensor<[2,256,20,64],f16> | |
%2064 = torch.aten.view %2063, %1927 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2065 = torch.aten.transpose.int %501, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2066 = torch.aten.view %2064, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2067 = torch.aten.mm %2066, %2065 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2068 = torch.aten.mul.Scalar %500, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2069 = torch.aten.add.Tensor %2068, %2067, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> | |
%2070 = torch.aten.view %2069, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2071 = torch.aten.add.Tensor %2070, %2005, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2072 = torch.aten.to.dtype %2071, %int7, %false, %false, %none : !torch.vtensor<[2,256,1280],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1280],f64> | |
%2073 = torch.aten.sum.dim_IntList %2072, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2074 = torch.aten.div.Scalar %2073, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2075 = torch.aten.sub.Tensor %2072, %2074, %float1.000000e00 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1],f64>, !torch.float -> !torch.vtensor<[2,256,1280],f64> | |
%2076 = torch.aten.mul.Tensor %2075, %2075 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1280],f64> -> !torch.vtensor<[2,256,1280],f64> | |
%2077 = torch.aten.sum.dim_IntList %2076, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2078 = torch.aten.div.Scalar %2077, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2079 = torch.aten.to.dtype %2078, %int5, %false, %false, %none : !torch.vtensor<[2,256,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2080 = torch.aten.sum.dim_IntList %2071, %688, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2081 = torch.aten.div.Scalar %2080, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2082 = torch.aten.add.Scalar %2079, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2083 = torch.aten.rsqrt %2082 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> | |
%2084 = torch.aten.sub.Tensor %2071, %2081, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2085 = torch.aten.mul.Tensor %2084, %2083 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2086 = torch.aten.mul.Tensor %2085, %499 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2087 = torch.aten.add.Tensor %2086, %498, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2088 = torch.aten.transpose.int %497, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16> | |
%2089 = torch.aten.view %2087, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2090 = torch.aten.mm %2089, %2088 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16> | |
%2091 = torch.aten.mul.Scalar %496, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16> | |
%2092 = torch.aten.add.Tensor %2091, %2090, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16> | |
%2093 = torch.prim.ListConstruct %int2, %int256, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2094 = torch.aten.view %2092, %2093 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16> | |
%2095 = torch.aten.slice.Tensor %2094, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> | |
%2096 = torch.aten.slice.Tensor %2094, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> | |
%2097 = torch.aten.gelu %2096, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16> | |
%2098 = torch.aten.mul.Tensor %2095, %2097 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16> | |
%2099 = torch.aten.transpose.int %495, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16> | |
%2100 = torch.prim.ListConstruct %int512, %int5120 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2101 = torch.aten.view %2098, %2100 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16> | |
%2102 = torch.aten.mm %2101, %2099 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2103 = torch.aten.mul.Scalar %494, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2104 = torch.aten.add.Tensor %2103, %2102, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> | |
%2105 = torch.aten.view %2104, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2106 = torch.aten.add.Tensor %2105, %2071, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2107 = torch.aten.transpose.int %493, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2108 = torch.aten.view %2106, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2109 = torch.aten.mm %2108, %2107 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2110 = torch.aten.mul.Scalar %492, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2111 = torch.aten.add.Tensor %2110, %2109, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> | |
%2112 = torch.aten.view %2111, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2113 = torch.prim.ListConstruct %int2, %int16, %int16, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2114 = torch.aten.view %2112, %2113 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> | |
%2115 = torch.aten.permute %2114, %1003 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2116 = torch.aten.clone %2115, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2117 = torch.aten.add.Tensor %2116, %1901, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2118 = torch.aten.view %2117, %1870 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> | |
%2119 = torch.aten.to.dtype %2118, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> | |
%2120 = torch.aten.sum.dim_IntList %2119, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2121 = torch.aten.div.Scalar %2120, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2122 = torch.aten.sub.Tensor %2119, %2121, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> | |
%2123 = torch.aten.mul.Tensor %2122, %2122 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> | |
%2124 = torch.aten.sum.dim_IntList %2123, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2125 = torch.aten.div.Scalar %2124, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2126 = torch.aten.to.dtype %2125, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2127 = torch.aten.sum.dim_IntList %2118, %721, %true, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2128 = torch.aten.div.Scalar %2127, %int10240 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2129 = torch.aten.add.Scalar %2126, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2130 = torch.aten.rsqrt %2129 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2131 = torch.aten.sub.Tensor %2118, %2128, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,256],f16> | |
%2132 = torch.aten.mul.Tensor %2131, %2130 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,256],f16> | |
%2133 = torch.aten.view %2132, %1886 : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2134 = torch.aten.unsqueeze %491, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2135 = torch.aten.unsqueeze %2134, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2136 = torch.aten.unsqueeze %2135, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2137 = torch.aten.unsqueeze %490, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2138 = torch.aten.unsqueeze %2137, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2139 = torch.aten.unsqueeze %2138, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2140 = torch.aten.mul.Tensor %2133, %2139 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2141 = torch.aten.add.Tensor %2140, %2136, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2142 = torch.aten.sigmoid %2141 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2143 = torch.aten.mul.Tensor %2142, %2141 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2144 = torch.aten.convolution %2143, %489, %488, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2145 = torch.aten.sigmoid %715 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2146 = torch.aten.mul.Tensor %2145, %715 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2147 = torch.aten.transpose.int %487, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2148 = torch.aten.mm %2146, %2147 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2149 = torch.aten.mul.Scalar %486, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2150 = torch.aten.add.Tensor %2149, %2148, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> | |
%2151 = torch.aten.unsqueeze %2150, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16> | |
%2152 = torch.aten.unsqueeze %2151, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16> | |
%2153 = torch.aten.add.Tensor %2144, %2152, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2154 = torch.aten.view %2153, %1870 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> | |
%2155 = torch.aten.to.dtype %2154, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> | |
%2156 = torch.aten.sum.dim_IntList %2155, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2157 = torch.aten.div.Scalar %2156, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2158 = torch.aten.sub.Tensor %2155, %2157, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> | |
%2159 = torch.aten.mul.Tensor %2158, %2158 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> | |
%2160 = torch.aten.sum.dim_IntList %2159, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2161 = torch.aten.div.Scalar %2160, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2162 = torch.aten.to.dtype %2161, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2163 = torch.aten.sum.dim_IntList %2154, %721, %true, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2164 = torch.aten.div.Scalar %2163, %int10240 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2165 = torch.aten.add.Scalar %2162, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2166 = torch.aten.rsqrt %2165 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2167 = torch.aten.sub.Tensor %2154, %2164, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,256],f16> | |
%2168 = torch.aten.mul.Tensor %2167, %2166 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,256],f16> | |
%2169 = torch.aten.view %2168, %1886 : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2170 = torch.aten.unsqueeze %485, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2171 = torch.aten.unsqueeze %2170, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2172 = torch.aten.unsqueeze %2171, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2173 = torch.aten.unsqueeze %484, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2174 = torch.aten.unsqueeze %2173, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2175 = torch.aten.unsqueeze %2174, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2176 = torch.aten.mul.Tensor %2169, %2175 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2177 = torch.aten.add.Tensor %2176, %2172, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2178 = torch.aten.sigmoid %2177 : !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2179 = torch.aten.mul.Tensor %2178, %2177 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2180 = torch.aten.convolution %2179, %483, %482, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2181 = torch.aten.add.Tensor %2117, %2180, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2182 = torch.aten.div.Scalar %2181, %float1.000000e00 : !torch.vtensor<[2,1280,16,16],f16>, !torch.float -> !torch.vtensor<[2,1280,16,16],f16> | |
%2183 = torch.aten.view %2182, %1870 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,256],f16> | |
%2184 = torch.aten.to.dtype %2183, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,256],f64> | |
%2185 = torch.aten.sum.dim_IntList %2184, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2186 = torch.aten.div.Scalar %2185, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2187 = torch.aten.sub.Tensor %2184, %2186, %float1.000000e00 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,256],f64> | |
%2188 = torch.aten.mul.Tensor %2187, %2187 : !torch.vtensor<[2,32,40,256],f64>, !torch.vtensor<[2,32,40,256],f64> -> !torch.vtensor<[2,32,40,256],f64> | |
%2189 = torch.aten.sum.dim_IntList %2188, %721, %true, %none : !torch.vtensor<[2,32,40,256],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2190 = torch.aten.div.Scalar %2189, %int10240 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2191 = torch.aten.to.dtype %2190, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2192 = torch.aten.sum.dim_IntList %2183, %721, %true, %none : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2193 = torch.aten.div.Scalar %2192, %int10240 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2194 = torch.aten.add.Scalar %2191, %float9.999990e-07, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2195 = torch.aten.rsqrt %2194 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2196 = torch.aten.sub.Tensor %2183, %2193, %int1 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,256],f16> | |
%2197 = torch.aten.mul.Tensor %2196, %2195 : !torch.vtensor<[2,32,40,256],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,256],f16> | |
%2198 = torch.aten.view %2197, %1886 : !torch.vtensor<[2,32,40,256],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2199 = torch.aten.unsqueeze %481, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2200 = torch.aten.unsqueeze %2199, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2201 = torch.aten.unsqueeze %2200, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2202 = torch.aten.unsqueeze %480, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2203 = torch.aten.unsqueeze %2202, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2204 = torch.aten.unsqueeze %2203, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2205 = torch.aten.mul.Tensor %2198, %2204 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2206 = torch.aten.add.Tensor %2205, %2201, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2207 = torch.aten.permute %2206, %811 : !torch.vtensor<[2,1280,16,16],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> | |
%2208 = torch.aten.view %2207, %1927 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2209 = torch.aten.transpose.int %479, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2210 = torch.aten.broadcast_to %2208, %1927 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2211 = torch.aten.view %2210, %1927 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2212 = torch.aten.broadcast_to %2209, %1932 : !torch.vtensor<[1280,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,1280],f16> | |
%2213 = torch.aten.view %2212, %1932 : !torch.vtensor<[2,1280,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,1280],f16> | |
%2214 = torch.aten.bmm %2211, %2213 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,1280,1280],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2215 = torch.aten.view %2214, %1927 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2216 = torch.aten.add.Tensor %2215, %478, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2217 = torch.aten.to.dtype %2216, %int7, %false, %false, %none : !torch.vtensor<[2,256,1280],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1280],f64> | |
%2218 = torch.aten.sum.dim_IntList %2217, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2219 = torch.aten.div.Scalar %2218, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2220 = torch.aten.sub.Tensor %2217, %2219, %float1.000000e00 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1],f64>, !torch.float -> !torch.vtensor<[2,256,1280],f64> | |
%2221 = torch.aten.mul.Tensor %2220, %2220 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1280],f64> -> !torch.vtensor<[2,256,1280],f64> | |
%2222 = torch.aten.sum.dim_IntList %2221, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2223 = torch.aten.div.Scalar %2222, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2224 = torch.aten.to.dtype %2223, %int5, %false, %false, %none : !torch.vtensor<[2,256,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2225 = torch.aten.sum.dim_IntList %2216, %688, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2226 = torch.aten.div.Scalar %2225, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2227 = torch.aten.add.Scalar %2224, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2228 = torch.aten.rsqrt %2227 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> | |
%2229 = torch.aten.sub.Tensor %2216, %2226, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2230 = torch.aten.mul.Tensor %2229, %2228 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2231 = torch.aten.mul.Tensor %2230, %477 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2232 = torch.aten.add.Tensor %2231, %476, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2233 = torch.aten.transpose.int %475, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2234 = torch.aten.view %2232, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2235 = torch.aten.mm %2234, %2233 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2236 = torch.aten.view %2235, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2237 = torch.aten.view %2236, %1959 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%2238 = torch.aten.permute %2237, %847 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%2239 = torch.aten.clone %2238, %int0 : !torch.vtensor<[2,20,256,64],f16>, !torch.int -> !torch.vtensor<[2,20,256,64],f16> | |
%2240 = torch.aten.view %2239, %1963 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[40,256,64],f16> | |
%2241 = torch.aten.transpose.int %474, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2242 = torch.aten.view %2232, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2243 = torch.aten.mm %2242, %2241 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2244 = torch.aten.view %2243, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2245 = torch.aten.transpose.int %473, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2246 = torch.aten.view %2232, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2247 = torch.aten.mm %2246, %2245 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2248 = torch.aten.view %2247, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2249 = torch.aten.view %2244, %1959 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%2250 = torch.aten.permute %2249, %847 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%2251 = torch.aten.clone %2250, %int0 : !torch.vtensor<[2,20,256,64],f16>, !torch.int -> !torch.vtensor<[2,20,256,64],f16> | |
%2252 = torch.aten.view %2251, %1963 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[40,256,64],f16> | |
%2253 = torch.aten.view %2248, %1959 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%2254 = torch.aten.permute %2253, %847 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%2255 = torch.aten.clone %2254, %int0 : !torch.vtensor<[2,20,256,64],f16>, !torch.int -> !torch.vtensor<[2,20,256,64],f16> | |
%2256 = torch.aten.view %2255, %1963 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[40,256,64],f16> | |
%2257 = torch.aten.empty.memory_format %1981, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,256,256],f32> | |
%2258 = torch.aten.transpose.int %2252, %int-1, %int-2 : !torch.vtensor<[40,256,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[40,64,256],f16> | |
%2259 = torch.aten.bmm %2240, %2258 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,64,256],f16> -> !torch.vtensor<[40,256,256],f16> | |
%2260 = torch.aten.mul.Scalar %2259, %float1.250000e-01 : !torch.vtensor<[40,256,256],f16>, !torch.float -> !torch.vtensor<[40,256,256],f16> | |
%2261 = torch.aten.to.dtype %2257, %int5, %false, %false, %none : !torch.vtensor<[40,256,256],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,256,256],f16> | |
%2262 = torch.aten.add.Tensor %2260, %2257, %int0 : !torch.vtensor<[40,256,256],f16>, !torch.vtensor<[40,256,256],f32>, !torch.int -> !torch.vtensor<[40,256,256],f16> | |
%values_18, %indices_19 = torch.aten.max.dim %2262, %int-1, %true : !torch.vtensor<[40,256,256],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,256,1],f16>, !torch.vtensor<[40,256,1],si64> | |
%2263 = torch.aten.sub.Tensor %2262, %values_18, %float1.000000e00 : !torch.vtensor<[40,256,256],f16>, !torch.vtensor<[40,256,1],f16>, !torch.float -> !torch.vtensor<[40,256,256],f16> | |
%2264 = torch.aten.exp %2263 : !torch.vtensor<[40,256,256],f16> -> !torch.vtensor<[40,256,256],f16> | |
%2265 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%2266 = torch.aten.sum.dim_IntList %2264, %2265, %true, %none : !torch.vtensor<[40,256,256],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,256,1],f16> | |
%2267 = torch.aten.div.Tensor %2264, %2266 : !torch.vtensor<[40,256,256],f16>, !torch.vtensor<[40,256,1],f16> -> !torch.vtensor<[40,256,256],f16> | |
%2268 = torch.aten.bmm %2267, %2256 : !torch.vtensor<[40,256,256],f16>, !torch.vtensor<[40,256,64],f16> -> !torch.vtensor<[40,256,64],f16> | |
%2269 = torch.aten.view %2268, %1994 : !torch.vtensor<[40,256,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%2270 = torch.aten.permute %2269, %847 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%2271 = torch.aten.clone %2270, %int0 : !torch.vtensor<[2,256,20,64],f16>, !torch.int -> !torch.vtensor<[2,256,20,64],f16> | |
%2272 = torch.aten.view %2271, %1927 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2273 = torch.aten.transpose.int %472, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2274 = torch.aten.view %2272, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2275 = torch.aten.mm %2274, %2273 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2276 = torch.aten.mul.Scalar %471, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2277 = torch.aten.add.Tensor %2276, %2275, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> | |
%2278 = torch.aten.view %2277, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2279 = torch.aten.add.Tensor %2278, %2216, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2280 = torch.aten.to.dtype %2279, %int7, %false, %false, %none : !torch.vtensor<[2,256,1280],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1280],f64> | |
%2281 = torch.aten.sum.dim_IntList %2280, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2282 = torch.aten.div.Scalar %2281, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2283 = torch.aten.sub.Tensor %2280, %2282, %float1.000000e00 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1],f64>, !torch.float -> !torch.vtensor<[2,256,1280],f64> | |
%2284 = torch.aten.mul.Tensor %2283, %2283 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1280],f64> -> !torch.vtensor<[2,256,1280],f64> | |
%2285 = torch.aten.sum.dim_IntList %2284, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2286 = torch.aten.div.Scalar %2285, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2287 = torch.aten.to.dtype %2286, %int5, %false, %false, %none : !torch.vtensor<[2,256,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2288 = torch.aten.sum.dim_IntList %2279, %688, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2289 = torch.aten.div.Scalar %2288, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2290 = torch.aten.add.Scalar %2287, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2291 = torch.aten.rsqrt %2290 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> | |
%2292 = torch.aten.sub.Tensor %2279, %2289, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2293 = torch.aten.mul.Tensor %2292, %2291 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2294 = torch.aten.mul.Tensor %2293, %470 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2295 = torch.aten.add.Tensor %2294, %469, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2296 = torch.aten.transpose.int %468, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2297 = torch.aten.view %2295, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2298 = torch.aten.mm %2297, %2296 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2299 = torch.aten.view %2298, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2300 = torch.aten.view %2299, %1959 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%2301 = torch.aten.permute %2300, %847 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%2302 = torch.aten.clone %2301, %int0 : !torch.vtensor<[2,20,256,64],f16>, !torch.int -> !torch.vtensor<[2,20,256,64],f16> | |
%2303 = torch.aten.view %2302, %1963 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[40,256,64],f16> | |
%2304 = torch.aten.transpose.int %467, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16> | |
%2305 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%2306 = torch.aten.mm %2305, %2304 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2307 = torch.aten.view %2306, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2308 = torch.aten.transpose.int %466, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16> | |
%2309 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%2310 = torch.aten.mm %2309, %2308 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2311 = torch.aten.view %2310, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2312 = torch.aten.view %2307, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2313 = torch.aten.permute %2312, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2314 = torch.aten.clone %2313, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2315 = torch.aten.view %2314, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2316 = torch.aten.view %2311, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2317 = torch.aten.permute %2316, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2318 = torch.aten.clone %2317, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2319 = torch.aten.view %2318, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2320 = torch.aten.empty.memory_format %1963, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,256,64],f32> | |
%2321 = torch.aten.transpose.int %2315, %int-1, %int-2 : !torch.vtensor<[40,64,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[40,64,64],f16> | |
%2322 = torch.aten.bmm %2303, %2321 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,256,64],f16> | |
%2323 = torch.aten.mul.Scalar %2322, %float1.250000e-01 : !torch.vtensor<[40,256,64],f16>, !torch.float -> !torch.vtensor<[40,256,64],f16> | |
%2324 = torch.aten.to.dtype %2320, %int5, %false, %false, %none : !torch.vtensor<[40,256,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,256,64],f16> | |
%2325 = torch.aten.add.Tensor %2323, %2320, %int0 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,256,64],f32>, !torch.int -> !torch.vtensor<[40,256,64],f16> | |
%values_20, %indices_21 = torch.aten.max.dim %2325, %int-1, %true : !torch.vtensor<[40,256,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,256,1],f16>, !torch.vtensor<[40,256,1],si64> | |
%2326 = torch.aten.sub.Tensor %2325, %values_20, %float1.000000e00 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,256,1],f16>, !torch.float -> !torch.vtensor<[40,256,64],f16> | |
%2327 = torch.aten.exp %2326 : !torch.vtensor<[40,256,64],f16> -> !torch.vtensor<[40,256,64],f16> | |
%2328 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%2329 = torch.aten.sum.dim_IntList %2327, %2328, %true, %none : !torch.vtensor<[40,256,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,256,1],f16> | |
%2330 = torch.aten.div.Tensor %2327, %2329 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,256,1],f16> -> !torch.vtensor<[40,256,64],f16> | |
%2331 = torch.aten.bmm %2330, %2319 : !torch.vtensor<[40,256,64],f16>, !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,256,64],f16> | |
%2332 = torch.aten.view %2331, %1994 : !torch.vtensor<[40,256,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,256,64],f16> | |
%2333 = torch.aten.permute %2332, %847 : !torch.vtensor<[2,20,256,64],f16>, !torch.list<int> -> !torch.vtensor<[2,256,20,64],f16> | |
%2334 = torch.aten.clone %2333, %int0 : !torch.vtensor<[2,256,20,64],f16>, !torch.int -> !torch.vtensor<[2,256,20,64],f16> | |
%2335 = torch.aten.view %2334, %1927 : !torch.vtensor<[2,256,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2336 = torch.aten.transpose.int %465, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2337 = torch.aten.view %2335, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2338 = torch.aten.mm %2337, %2336 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2339 = torch.aten.mul.Scalar %464, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2340 = torch.aten.add.Tensor %2339, %2338, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> | |
%2341 = torch.aten.view %2340, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2342 = torch.aten.add.Tensor %2341, %2279, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2343 = torch.aten.to.dtype %2342, %int7, %false, %false, %none : !torch.vtensor<[2,256,1280],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1280],f64> | |
%2344 = torch.aten.sum.dim_IntList %2343, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2345 = torch.aten.div.Scalar %2344, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2346 = torch.aten.sub.Tensor %2343, %2345, %float1.000000e00 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1],f64>, !torch.float -> !torch.vtensor<[2,256,1280],f64> | |
%2347 = torch.aten.mul.Tensor %2346, %2346 : !torch.vtensor<[2,256,1280],f64>, !torch.vtensor<[2,256,1280],f64> -> !torch.vtensor<[2,256,1280],f64> | |
%2348 = torch.aten.sum.dim_IntList %2347, %688, %true, %none : !torch.vtensor<[2,256,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f64> | |
%2349 = torch.aten.div.Scalar %2348, %int1280 : !torch.vtensor<[2,256,1],f64>, !torch.int -> !torch.vtensor<[2,256,1],f64> | |
%2350 = torch.aten.to.dtype %2349, %int5, %false, %false, %none : !torch.vtensor<[2,256,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2351 = torch.aten.sum.dim_IntList %2342, %688, %true, %none : !torch.vtensor<[2,256,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,256,1],f16> | |
%2352 = torch.aten.div.Scalar %2351, %int1280 : !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2353 = torch.aten.add.Scalar %2350, %float1.000000e-05, %int1 : !torch.vtensor<[2,256,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,256,1],f16> | |
%2354 = torch.aten.rsqrt %2353 : !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1],f16> | |
%2355 = torch.aten.sub.Tensor %2342, %2352, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2356 = torch.aten.mul.Tensor %2355, %2354 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2357 = torch.aten.mul.Tensor %2356, %463 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,256,1280],f16> | |
%2358 = torch.aten.add.Tensor %2357, %462, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2359 = torch.aten.transpose.int %461, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16> | |
%2360 = torch.aten.view %2358, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2361 = torch.aten.mm %2360, %2359 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[512,10240],f16> | |
%2362 = torch.aten.mul.Scalar %460, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16> | |
%2363 = torch.aten.add.Tensor %2362, %2361, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[512,10240],f16>, !torch.int -> !torch.vtensor<[512,10240],f16> | |
%2364 = torch.aten.view %2363, %2093 : !torch.vtensor<[512,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,256,10240],f16> | |
%2365 = torch.aten.slice.Tensor %2364, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> | |
%2366 = torch.aten.slice.Tensor %2364, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,256,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,256,5120],f16> | |
%2367 = torch.aten.gelu %2366, %str : !torch.vtensor<[2,256,5120],f16>, !torch.str -> !torch.vtensor<[2,256,5120],f16> | |
%2368 = torch.aten.mul.Tensor %2365, %2367 : !torch.vtensor<[2,256,5120],f16>, !torch.vtensor<[2,256,5120],f16> -> !torch.vtensor<[2,256,5120],f16> | |
%2369 = torch.aten.transpose.int %459, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16> | |
%2370 = torch.aten.view %2368, %2100 : !torch.vtensor<[2,256,5120],f16>, !torch.list<int> -> !torch.vtensor<[512,5120],f16> | |
%2371 = torch.aten.mm %2370, %2369 : !torch.vtensor<[512,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2372 = torch.aten.mul.Scalar %458, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2373 = torch.aten.add.Tensor %2372, %2371, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> | |
%2374 = torch.aten.view %2373, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2375 = torch.aten.add.Tensor %2374, %2342, %int1 : !torch.vtensor<[2,256,1280],f16>, !torch.vtensor<[2,256,1280],f16>, !torch.int -> !torch.vtensor<[2,256,1280],f16> | |
%2376 = torch.aten.transpose.int %457, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2377 = torch.aten.view %2375, %1955 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[512,1280],f16> | |
%2378 = torch.aten.mm %2377, %2376 : !torch.vtensor<[512,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[512,1280],f16> | |
%2379 = torch.aten.mul.Scalar %456, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2380 = torch.aten.add.Tensor %2379, %2378, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[512,1280],f16>, !torch.int -> !torch.vtensor<[512,1280],f16> | |
%2381 = torch.aten.view %2380, %1927 : !torch.vtensor<[512,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,256,1280],f16> | |
%2382 = torch.aten.view %2381, %2113 : !torch.vtensor<[2,256,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,16,16,1280],f16> | |
%2383 = torch.aten.permute %2382, %1003 : !torch.vtensor<[2,16,16,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,16,16],f16> | |
%2384 = torch.aten.clone %2383, %int0 : !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2385 = torch.aten.add.Tensor %2384, %2182, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[2,1280,16,16],f16>, !torch.int -> !torch.vtensor<[2,1280,16,16],f16> | |
%2386 = torch.aten.convolution %2385, %455, %454, %1275, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,16,16],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2387 = torch.prim.ListConstruct %int2, %int32, %int40, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2388 = torch.aten.view %2386, %2387 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16> | |
%2389 = torch.aten.to.dtype %2388, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64> | |
%2390 = torch.aten.sum.dim_IntList %2389, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2391 = torch.aten.div.Scalar %2390, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2392 = torch.aten.sub.Tensor %2389, %2391, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64> | |
%2393 = torch.aten.mul.Tensor %2392, %2392 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64> | |
%2394 = torch.aten.sum.dim_IntList %2393, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2395 = torch.aten.div.Scalar %2394, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2396 = torch.aten.to.dtype %2395, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2397 = torch.aten.sum.dim_IntList %2388, %721, %true, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2398 = torch.aten.div.Scalar %2397, %int2560 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2399 = torch.aten.add.Scalar %2396, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2400 = torch.aten.rsqrt %2399 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2401 = torch.aten.sub.Tensor %2388, %2398, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,64],f16> | |
%2402 = torch.aten.mul.Tensor %2401, %2400 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,64],f16> | |
%2403 = torch.prim.ListConstruct %int2, %int1280, %int8, %int8 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2404 = torch.aten.view %2402, %2403 : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2405 = torch.aten.unsqueeze %453, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2406 = torch.aten.unsqueeze %2405, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2407 = torch.aten.unsqueeze %2406, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2408 = torch.aten.unsqueeze %452, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2409 = torch.aten.unsqueeze %2408, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2410 = torch.aten.unsqueeze %2409, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2411 = torch.aten.mul.Tensor %2404, %2410 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2412 = torch.aten.add.Tensor %2411, %2407, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2413 = torch.aten.sigmoid %2412 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2414 = torch.aten.mul.Tensor %2413, %2412 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2415 = torch.aten.convolution %2414, %451, %450, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2416 = torch.aten.sigmoid %715 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2417 = torch.aten.mul.Tensor %2416, %715 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2418 = torch.aten.transpose.int %449, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2419 = torch.aten.mm %2417, %2418 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2420 = torch.aten.mul.Scalar %448, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2421 = torch.aten.add.Tensor %2420, %2419, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> | |
%2422 = torch.aten.unsqueeze %2421, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16> | |
%2423 = torch.aten.unsqueeze %2422, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16> | |
%2424 = torch.aten.add.Tensor %2415, %2423, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2425 = torch.aten.view %2424, %2387 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16> | |
%2426 = torch.aten.to.dtype %2425, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64> | |
%2427 = torch.aten.sum.dim_IntList %2426, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2428 = torch.aten.div.Scalar %2427, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2429 = torch.aten.sub.Tensor %2426, %2428, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64> | |
%2430 = torch.aten.mul.Tensor %2429, %2429 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64> | |
%2431 = torch.aten.sum.dim_IntList %2430, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2432 = torch.aten.div.Scalar %2431, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2433 = torch.aten.to.dtype %2432, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2434 = torch.aten.sum.dim_IntList %2425, %721, %true, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2435 = torch.aten.div.Scalar %2434, %int2560 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2436 = torch.aten.add.Scalar %2433, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2437 = torch.aten.rsqrt %2436 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2438 = torch.aten.sub.Tensor %2425, %2435, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,64],f16> | |
%2439 = torch.aten.mul.Tensor %2438, %2437 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,64],f16> | |
%2440 = torch.aten.view %2439, %2403 : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2441 = torch.aten.unsqueeze %447, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2442 = torch.aten.unsqueeze %2441, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2443 = torch.aten.unsqueeze %2442, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2444 = torch.aten.unsqueeze %446, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2445 = torch.aten.unsqueeze %2444, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2446 = torch.aten.unsqueeze %2445, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2447 = torch.aten.mul.Tensor %2440, %2446 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2448 = torch.aten.add.Tensor %2447, %2443, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2449 = torch.aten.sigmoid %2448 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2450 = torch.aten.mul.Tensor %2449, %2448 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2451 = torch.aten.convolution %2450, %445, %444, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2452 = torch.aten.add.Tensor %2386, %2451, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2453 = torch.aten.div.Scalar %2452, %float1.000000e00 : !torch.vtensor<[2,1280,8,8],f16>, !torch.float -> !torch.vtensor<[2,1280,8,8],f16> | |
%2454 = torch.aten.view %2453, %2387 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16> | |
%2455 = torch.aten.to.dtype %2454, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64> | |
%2456 = torch.aten.sum.dim_IntList %2455, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2457 = torch.aten.div.Scalar %2456, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2458 = torch.aten.sub.Tensor %2455, %2457, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64> | |
%2459 = torch.aten.mul.Tensor %2458, %2458 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64> | |
%2460 = torch.aten.sum.dim_IntList %2459, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2461 = torch.aten.div.Scalar %2460, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2462 = torch.aten.to.dtype %2461, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2463 = torch.aten.sum.dim_IntList %2454, %721, %true, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2464 = torch.aten.div.Scalar %2463, %int2560 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2465 = torch.aten.add.Scalar %2462, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2466 = torch.aten.rsqrt %2465 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2467 = torch.aten.sub.Tensor %2454, %2464, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,64],f16> | |
%2468 = torch.aten.mul.Tensor %2467, %2466 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,64],f16> | |
%2469 = torch.aten.view %2468, %2403 : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2470 = torch.aten.unsqueeze %443, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2471 = torch.aten.unsqueeze %2470, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2472 = torch.aten.unsqueeze %2471, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2473 = torch.aten.unsqueeze %442, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2474 = torch.aten.unsqueeze %2473, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2475 = torch.aten.unsqueeze %2474, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2476 = torch.aten.mul.Tensor %2469, %2475 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2477 = torch.aten.add.Tensor %2476, %2472, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2478 = torch.aten.sigmoid %2477 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2479 = torch.aten.mul.Tensor %2478, %2477 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2480 = torch.aten.convolution %2479, %441, %440, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2481 = torch.aten.sigmoid %715 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2482 = torch.aten.mul.Tensor %2481, %715 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2483 = torch.aten.transpose.int %439, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2484 = torch.aten.mm %2482, %2483 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2485 = torch.aten.mul.Scalar %438, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2486 = torch.aten.add.Tensor %2485, %2484, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> | |
%2487 = torch.aten.unsqueeze %2486, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16> | |
%2488 = torch.aten.unsqueeze %2487, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16> | |
%2489 = torch.aten.add.Tensor %2480, %2488, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2490 = torch.aten.view %2489, %2387 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16> | |
%2491 = torch.aten.to.dtype %2490, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64> | |
%2492 = torch.aten.sum.dim_IntList %2491, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2493 = torch.aten.div.Scalar %2492, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2494 = torch.aten.sub.Tensor %2491, %2493, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64> | |
%2495 = torch.aten.mul.Tensor %2494, %2494 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64> | |
%2496 = torch.aten.sum.dim_IntList %2495, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2497 = torch.aten.div.Scalar %2496, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2498 = torch.aten.to.dtype %2497, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2499 = torch.aten.sum.dim_IntList %2490, %721, %true, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2500 = torch.aten.div.Scalar %2499, %int2560 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2501 = torch.aten.add.Scalar %2498, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2502 = torch.aten.rsqrt %2501 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2503 = torch.aten.sub.Tensor %2490, %2500, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,64],f16> | |
%2504 = torch.aten.mul.Tensor %2503, %2502 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,64],f16> | |
%2505 = torch.aten.view %2504, %2403 : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2506 = torch.aten.unsqueeze %437, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2507 = torch.aten.unsqueeze %2506, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2508 = torch.aten.unsqueeze %2507, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2509 = torch.aten.unsqueeze %436, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2510 = torch.aten.unsqueeze %2509, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2511 = torch.aten.unsqueeze %2510, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2512 = torch.aten.mul.Tensor %2505, %2511 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2513 = torch.aten.add.Tensor %2512, %2508, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2514 = torch.aten.sigmoid %2513 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2515 = torch.aten.mul.Tensor %2514, %2513 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2516 = torch.aten.convolution %2515, %435, %434, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2517 = torch.aten.add.Tensor %2453, %2516, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2518 = torch.aten.div.Scalar %2517, %float1.000000e00 : !torch.vtensor<[2,1280,8,8],f16>, !torch.float -> !torch.vtensor<[2,1280,8,8],f16> | |
%2519 = torch.aten.view %2518, %2387 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16> | |
%2520 = torch.aten.to.dtype %2519, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64> | |
%2521 = torch.aten.sum.dim_IntList %2520, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2522 = torch.aten.div.Scalar %2521, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2523 = torch.aten.sub.Tensor %2520, %2522, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64> | |
%2524 = torch.aten.mul.Tensor %2523, %2523 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64> | |
%2525 = torch.aten.sum.dim_IntList %2524, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2526 = torch.aten.div.Scalar %2525, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2527 = torch.aten.to.dtype %2526, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2528 = torch.aten.sum.dim_IntList %2519, %721, %true, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2529 = torch.aten.div.Scalar %2528, %int2560 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2530 = torch.aten.add.Scalar %2527, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2531 = torch.aten.rsqrt %2530 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2532 = torch.aten.sub.Tensor %2519, %2529, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,64],f16> | |
%2533 = torch.aten.mul.Tensor %2532, %2531 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,64],f16> | |
%2534 = torch.aten.view %2533, %2403 : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2535 = torch.aten.unsqueeze %433, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2536 = torch.aten.unsqueeze %2535, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2537 = torch.aten.unsqueeze %2536, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2538 = torch.aten.unsqueeze %432, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2539 = torch.aten.unsqueeze %2538, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2540 = torch.aten.unsqueeze %2539, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2541 = torch.aten.mul.Tensor %2534, %2540 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2542 = torch.aten.add.Tensor %2541, %2537, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2543 = torch.aten.sigmoid %2542 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2544 = torch.aten.mul.Tensor %2543, %2542 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2545 = torch.aten.convolution %2544, %431, %430, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2546 = torch.aten.sigmoid %715 : !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2547 = torch.aten.mul.Tensor %2546, %715 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[2,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2548 = torch.aten.transpose.int %429, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2549 = torch.aten.mm %2547, %2548 : !torch.vtensor<[2,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[2,1280],f16> | |
%2550 = torch.aten.mul.Scalar %428, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2551 = torch.aten.add.Tensor %2550, %2549, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280],f16> | |
%2552 = torch.aten.unsqueeze %2551, %int2 : !torch.vtensor<[2,1280],f16>, !torch.int -> !torch.vtensor<[2,1280,1],f16> | |
%2553 = torch.aten.unsqueeze %2552, %int3 : !torch.vtensor<[2,1280,1],f16>, !torch.int -> !torch.vtensor<[2,1280,1,1],f16> | |
%2554 = torch.aten.add.Tensor %2545, %2553, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2555 = torch.aten.view %2554, %2387 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16> | |
%2556 = torch.aten.to.dtype %2555, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64> | |
%2557 = torch.aten.sum.dim_IntList %2556, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2558 = torch.aten.div.Scalar %2557, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2559 = torch.aten.sub.Tensor %2556, %2558, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64> | |
%2560 = torch.aten.mul.Tensor %2559, %2559 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64> | |
%2561 = torch.aten.sum.dim_IntList %2560, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2562 = torch.aten.div.Scalar %2561, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2563 = torch.aten.to.dtype %2562, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2564 = torch.aten.sum.dim_IntList %2555, %721, %true, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2565 = torch.aten.div.Scalar %2564, %int2560 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2566 = torch.aten.add.Scalar %2563, %float1.000000e-05, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2567 = torch.aten.rsqrt %2566 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2568 = torch.aten.sub.Tensor %2555, %2565, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,64],f16> | |
%2569 = torch.aten.mul.Tensor %2568, %2567 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,64],f16> | |
%2570 = torch.aten.view %2569, %2403 : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2571 = torch.aten.unsqueeze %427, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2572 = torch.aten.unsqueeze %2571, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2573 = torch.aten.unsqueeze %2572, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2574 = torch.aten.unsqueeze %426, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2575 = torch.aten.unsqueeze %2574, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2576 = torch.aten.unsqueeze %2575, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2577 = torch.aten.mul.Tensor %2570, %2576 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2578 = torch.aten.add.Tensor %2577, %2573, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2579 = torch.aten.sigmoid %2578 : !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2580 = torch.aten.mul.Tensor %2579, %2578 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2581 = torch.aten.convolution %2580, %425, %424, %716, %716, %716, %false, %717, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1280,1280,3,3],f16>, !torch.vtensor<[1280],f16>, !torch.list<int>, !torch.list<int>, !torch.list<int>, !torch.bool, !torch.list<int>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2582 = torch.aten.add.Tensor %2518, %2581, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2583 = torch.aten.div.Scalar %2582, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2584 = torch.aten.view %2583, %2387 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,32,40,64],f16> | |
%2585 = torch.aten.to.dtype %2584, %int7, %false, %false, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,40,64],f64> | |
%2586 = torch.aten.sum.dim_IntList %2585, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2587 = torch.aten.div.Scalar %2586, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2588 = torch.aten.sub.Tensor %2585, %2587, %float1.000000e00 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,1,1],f64>, !torch.float -> !torch.vtensor<[2,32,40,64],f64> | |
%2589 = torch.aten.mul.Tensor %2588, %2588 : !torch.vtensor<[2,32,40,64],f64>, !torch.vtensor<[2,32,40,64],f64> -> !torch.vtensor<[2,32,40,64],f64> | |
%2590 = torch.aten.sum.dim_IntList %2589, %721, %true, %none : !torch.vtensor<[2,32,40,64],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f64> | |
%2591 = torch.aten.div.Scalar %2590, %int2560 : !torch.vtensor<[2,32,1,1],f64>, !torch.int -> !torch.vtensor<[2,32,1,1],f64> | |
%2592 = torch.aten.to.dtype %2591, %int5, %false, %false, %none : !torch.vtensor<[2,32,1,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2593 = torch.aten.sum.dim_IntList %2584, %721, %true, %none : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,32,1,1],f16> | |
%2594 = torch.aten.div.Scalar %2593, %int2560 : !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2595 = torch.aten.add.Scalar %2592, %float9.999990e-07, %int1 : !torch.vtensor<[2,32,1,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,32,1,1],f16> | |
%2596 = torch.aten.rsqrt %2595 : !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,1,1],f16> | |
%2597 = torch.aten.sub.Tensor %2584, %2594, %int1 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16>, !torch.int -> !torch.vtensor<[2,32,40,64],f16> | |
%2598 = torch.aten.mul.Tensor %2597, %2596 : !torch.vtensor<[2,32,40,64],f16>, !torch.vtensor<[2,32,1,1],f16> -> !torch.vtensor<[2,32,40,64],f16> | |
%2599 = torch.aten.view %2598, %2403 : !torch.vtensor<[2,32,40,64],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2600 = torch.aten.unsqueeze %423, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2601 = torch.aten.unsqueeze %2600, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2602 = torch.aten.unsqueeze %2601, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2603 = torch.aten.unsqueeze %422, %int0 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1,1280],f16> | |
%2604 = torch.aten.unsqueeze %2603, %int2 : !torch.vtensor<[1,1280],f16>, !torch.int -> !torch.vtensor<[1,1280,1],f16> | |
%2605 = torch.aten.unsqueeze %2604, %int3 : !torch.vtensor<[1,1280,1],f16>, !torch.int -> !torch.vtensor<[1,1280,1,1],f16> | |
%2606 = torch.aten.mul.Tensor %2599, %2605 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2607 = torch.aten.add.Tensor %2606, %2602, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[1,1280,1,1],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2608 = torch.aten.permute %2607, %811 : !torch.vtensor<[2,1280,8,8],f16>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16> | |
%2609 = torch.aten.view %2608, %2033 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2610 = torch.aten.transpose.int %421, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2611 = torch.aten.broadcast_to %2609, %2033 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2612 = torch.aten.view %2611, %2033 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2613 = torch.aten.broadcast_to %2610, %1932 : !torch.vtensor<[1280,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,1280],f16> | |
%2614 = torch.aten.view %2613, %1932 : !torch.vtensor<[2,1280,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,1280],f16> | |
%2615 = torch.aten.bmm %2612, %2614 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,1280,1280],f16> -> !torch.vtensor<[2,64,1280],f16> | |
%2616 = torch.aten.view %2615, %2033 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2617 = torch.aten.add.Tensor %2616, %420, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2618 = torch.aten.to.dtype %2617, %int7, %false, %false, %none : !torch.vtensor<[2,64,1280],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1280],f64> | |
%2619 = torch.aten.sum.dim_IntList %2618, %688, %true, %none : !torch.vtensor<[2,64,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f64> | |
%2620 = torch.aten.div.Scalar %2619, %int1280 : !torch.vtensor<[2,64,1],f64>, !torch.int -> !torch.vtensor<[2,64,1],f64> | |
%2621 = torch.aten.sub.Tensor %2618, %2620, %float1.000000e00 : !torch.vtensor<[2,64,1280],f64>, !torch.vtensor<[2,64,1],f64>, !torch.float -> !torch.vtensor<[2,64,1280],f64> | |
%2622 = torch.aten.mul.Tensor %2621, %2621 : !torch.vtensor<[2,64,1280],f64>, !torch.vtensor<[2,64,1280],f64> -> !torch.vtensor<[2,64,1280],f64> | |
%2623 = torch.aten.sum.dim_IntList %2622, %688, %true, %none : !torch.vtensor<[2,64,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f64> | |
%2624 = torch.aten.div.Scalar %2623, %int1280 : !torch.vtensor<[2,64,1],f64>, !torch.int -> !torch.vtensor<[2,64,1],f64> | |
%2625 = torch.aten.to.dtype %2624, %int5, %false, %false, %none : !torch.vtensor<[2,64,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16> | |
%2626 = torch.aten.sum.dim_IntList %2617, %688, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16> | |
%2627 = torch.aten.div.Scalar %2626, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16> | |
%2628 = torch.aten.add.Scalar %2625, %float1.000000e-05, %int1 : !torch.vtensor<[2,64,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,64,1],f16> | |
%2629 = torch.aten.rsqrt %2628 : !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1],f16> | |
%2630 = torch.aten.sub.Tensor %2617, %2627, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2631 = torch.aten.mul.Tensor %2630, %2629 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1280],f16> | |
%2632 = torch.aten.mul.Tensor %2631, %419 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,64,1280],f16> | |
%2633 = torch.aten.add.Tensor %2632, %418, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2634 = torch.aten.transpose.int %417, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2635 = torch.prim.ListConstruct %int128, %int1280 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2636 = torch.aten.view %2633, %2635 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16> | |
%2637 = torch.aten.mm %2636, %2634 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2638 = torch.aten.view %2637, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2639 = torch.aten.view %2638, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2640 = torch.aten.permute %2639, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2641 = torch.aten.clone %2640, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2642 = torch.aten.view %2641, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2643 = torch.aten.transpose.int %416, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2644 = torch.aten.view %2633, %2635 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16> | |
%2645 = torch.aten.mm %2644, %2643 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2646 = torch.aten.view %2645, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2647 = torch.aten.transpose.int %415, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2648 = torch.aten.view %2633, %2635 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16> | |
%2649 = torch.aten.mm %2648, %2647 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2650 = torch.aten.view %2649, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2651 = torch.aten.view %2646, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2652 = torch.aten.permute %2651, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2653 = torch.aten.clone %2652, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2654 = torch.aten.view %2653, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2655 = torch.aten.view %2650, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2656 = torch.aten.permute %2655, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2657 = torch.aten.clone %2656, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2658 = torch.aten.view %2657, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2659 = torch.aten.empty.memory_format %2043, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,64,64],f32> | |
%2660 = torch.aten.transpose.int %2654, %int-1, %int-2 : !torch.vtensor<[40,64,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[40,64,64],f16> | |
%2661 = torch.aten.bmm %2642, %2660 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,64,64],f16> | |
%2662 = torch.aten.mul.Scalar %2661, %float1.250000e-01 : !torch.vtensor<[40,64,64],f16>, !torch.float -> !torch.vtensor<[40,64,64],f16> | |
%2663 = torch.aten.to.dtype %2659, %int5, %false, %false, %none : !torch.vtensor<[40,64,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,64,64],f16> | |
%2664 = torch.aten.add.Tensor %2662, %2659, %int0 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,64],f32>, !torch.int -> !torch.vtensor<[40,64,64],f16> | |
%values_22, %indices_23 = torch.aten.max.dim %2664, %int-1, %true : !torch.vtensor<[40,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,64,1],f16>, !torch.vtensor<[40,64,1],si64> | |
%2665 = torch.aten.sub.Tensor %2664, %values_22, %float1.000000e00 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,1],f16>, !torch.float -> !torch.vtensor<[40,64,64],f16> | |
%2666 = torch.aten.exp %2665 : !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,64,64],f16> | |
%2667 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%2668 = torch.aten.sum.dim_IntList %2666, %2667, %true, %none : !torch.vtensor<[40,64,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,64,1],f16> | |
%2669 = torch.aten.div.Tensor %2666, %2668 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,1],f16> -> !torch.vtensor<[40,64,64],f16> | |
%2670 = torch.aten.bmm %2669, %2658 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,64,64],f16> | |
%2671 = torch.prim.ListConstruct %int2, %int20, %int64, %int64 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2672 = torch.aten.view %2670, %2671 : !torch.vtensor<[40,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2673 = torch.aten.permute %2672, %847 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2674 = torch.aten.clone %2673, %int0 : !torch.vtensor<[2,64,20,64],f16>, !torch.int -> !torch.vtensor<[2,64,20,64],f16> | |
%2675 = torch.aten.view %2674, %2033 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2676 = torch.aten.transpose.int %414, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2677 = torch.aten.view %2675, %2635 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16> | |
%2678 = torch.aten.mm %2677, %2676 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2679 = torch.aten.mul.Scalar %413, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2680 = torch.aten.add.Tensor %2679, %2678, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16> | |
%2681 = torch.aten.view %2680, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2682 = torch.aten.add.Tensor %2681, %2617, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2683 = torch.aten.to.dtype %2682, %int7, %false, %false, %none : !torch.vtensor<[2,64,1280],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1280],f64> | |
%2684 = torch.aten.sum.dim_IntList %2683, %688, %true, %none : !torch.vtensor<[2,64,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f64> | |
%2685 = torch.aten.div.Scalar %2684, %int1280 : !torch.vtensor<[2,64,1],f64>, !torch.int -> !torch.vtensor<[2,64,1],f64> | |
%2686 = torch.aten.sub.Tensor %2683, %2685, %float1.000000e00 : !torch.vtensor<[2,64,1280],f64>, !torch.vtensor<[2,64,1],f64>, !torch.float -> !torch.vtensor<[2,64,1280],f64> | |
%2687 = torch.aten.mul.Tensor %2686, %2686 : !torch.vtensor<[2,64,1280],f64>, !torch.vtensor<[2,64,1280],f64> -> !torch.vtensor<[2,64,1280],f64> | |
%2688 = torch.aten.sum.dim_IntList %2687, %688, %true, %none : !torch.vtensor<[2,64,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f64> | |
%2689 = torch.aten.div.Scalar %2688, %int1280 : !torch.vtensor<[2,64,1],f64>, !torch.int -> !torch.vtensor<[2,64,1],f64> | |
%2690 = torch.aten.to.dtype %2689, %int5, %false, %false, %none : !torch.vtensor<[2,64,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16> | |
%2691 = torch.aten.sum.dim_IntList %2682, %688, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16> | |
%2692 = torch.aten.div.Scalar %2691, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16> | |
%2693 = torch.aten.add.Scalar %2690, %float1.000000e-05, %int1 : !torch.vtensor<[2,64,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,64,1],f16> | |
%2694 = torch.aten.rsqrt %2693 : !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1],f16> | |
%2695 = torch.aten.sub.Tensor %2682, %2692, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2696 = torch.aten.mul.Tensor %2695, %2694 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1280],f16> | |
%2697 = torch.aten.mul.Tensor %2696, %412 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,64,1280],f16> | |
%2698 = torch.aten.add.Tensor %2697, %411, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2699 = torch.aten.transpose.int %410, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2700 = torch.aten.view %2698, %2635 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16> | |
%2701 = torch.aten.mm %2700, %2699 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2702 = torch.aten.view %2701, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2703 = torch.aten.view %2702, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2704 = torch.aten.permute %2703, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2705 = torch.aten.clone %2704, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2706 = torch.aten.view %2705, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2707 = torch.aten.transpose.int %409, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16> | |
%2708 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%2709 = torch.aten.mm %2708, %2707 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2710 = torch.aten.view %2709, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2711 = torch.aten.transpose.int %408, %int0, %int1 : !torch.vtensor<[1280,1024],f16>, !torch.int, !torch.int -> !torch.vtensor<[1024,1280],f16> | |
%2712 = torch.aten.view %arg2, %918 : !torch.vtensor<[2,64,1024],f16>, !torch.list<int> -> !torch.vtensor<[128,1024],f16> | |
%2713 = torch.aten.mm %2712, %2711 : !torch.vtensor<[128,1024],f16>, !torch.vtensor<[1024,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2714 = torch.aten.view %2713, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2715 = torch.aten.view %2710, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2716 = torch.aten.permute %2715, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2717 = torch.aten.clone %2716, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2718 = torch.aten.view %2717, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2719 = torch.aten.view %2714, %2039 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2720 = torch.aten.permute %2719, %847 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2721 = torch.aten.clone %2720, %int0 : !torch.vtensor<[2,20,64,64],f16>, !torch.int -> !torch.vtensor<[2,20,64,64],f16> | |
%2722 = torch.aten.view %2721, %2043 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[40,64,64],f16> | |
%2723 = torch.aten.empty.memory_format %2043, %int6, %none, %cpu, %false, %none : !torch.list<int>, !torch.int, !torch.none, !torch.Device, !torch.bool, !torch.none -> !torch.vtensor<[40,64,64],f32> | |
%2724 = torch.aten.transpose.int %2718, %int-1, %int-2 : !torch.vtensor<[40,64,64],f16>, !torch.int, !torch.int -> !torch.vtensor<[40,64,64],f16> | |
%2725 = torch.aten.bmm %2706, %2724 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,64,64],f16> | |
%2726 = torch.aten.mul.Scalar %2725, %float1.250000e-01 : !torch.vtensor<[40,64,64],f16>, !torch.float -> !torch.vtensor<[40,64,64],f16> | |
%2727 = torch.aten.to.dtype %2723, %int5, %false, %false, %none : !torch.vtensor<[40,64,64],f32>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[40,64,64],f16> | |
%2728 = torch.aten.add.Tensor %2726, %2723, %int0 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,64],f32>, !torch.int -> !torch.vtensor<[40,64,64],f16> | |
%values_24, %indices_25 = torch.aten.max.dim %2728, %int-1, %true : !torch.vtensor<[40,64,64],f16>, !torch.int, !torch.bool -> !torch.vtensor<[40,64,1],f16>, !torch.vtensor<[40,64,1],si64> | |
%2729 = torch.aten.sub.Tensor %2728, %values_24, %float1.000000e00 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,1],f16>, !torch.float -> !torch.vtensor<[40,64,64],f16> | |
%2730 = torch.aten.exp %2729 : !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,64,64],f16> | |
%2731 = torch.prim.ListConstruct %int-1 : (!torch.int) -> !torch.list<int> | |
%2732 = torch.aten.sum.dim_IntList %2730, %2731, %true, %none : !torch.vtensor<[40,64,64],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[40,64,1],f16> | |
%2733 = torch.aten.div.Tensor %2730, %2732 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,1],f16> -> !torch.vtensor<[40,64,64],f16> | |
%2734 = torch.aten.bmm %2733, %2722 : !torch.vtensor<[40,64,64],f16>, !torch.vtensor<[40,64,64],f16> -> !torch.vtensor<[40,64,64],f16> | |
%2735 = torch.aten.view %2734, %2671 : !torch.vtensor<[40,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,20,64,64],f16> | |
%2736 = torch.aten.permute %2735, %847 : !torch.vtensor<[2,20,64,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,20,64],f16> | |
%2737 = torch.aten.clone %2736, %int0 : !torch.vtensor<[2,64,20,64],f16>, !torch.int -> !torch.vtensor<[2,64,20,64],f16> | |
%2738 = torch.aten.view %2737, %2033 : !torch.vtensor<[2,64,20,64],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2739 = torch.aten.transpose.int %407, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2740 = torch.aten.view %2738, %2635 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16> | |
%2741 = torch.aten.mm %2740, %2739 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2742 = torch.aten.mul.Scalar %406, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2743 = torch.aten.add.Tensor %2742, %2741, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16> | |
%2744 = torch.aten.view %2743, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2745 = torch.aten.add.Tensor %2744, %2682, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2746 = torch.aten.to.dtype %2745, %int7, %false, %false, %none : !torch.vtensor<[2,64,1280],f16>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1280],f64> | |
%2747 = torch.aten.sum.dim_IntList %2746, %688, %true, %none : !torch.vtensor<[2,64,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f64> | |
%2748 = torch.aten.div.Scalar %2747, %int1280 : !torch.vtensor<[2,64,1],f64>, !torch.int -> !torch.vtensor<[2,64,1],f64> | |
%2749 = torch.aten.sub.Tensor %2746, %2748, %float1.000000e00 : !torch.vtensor<[2,64,1280],f64>, !torch.vtensor<[2,64,1],f64>, !torch.float -> !torch.vtensor<[2,64,1280],f64> | |
%2750 = torch.aten.mul.Tensor %2749, %2749 : !torch.vtensor<[2,64,1280],f64>, !torch.vtensor<[2,64,1280],f64> -> !torch.vtensor<[2,64,1280],f64> | |
%2751 = torch.aten.sum.dim_IntList %2750, %688, %true, %none : !torch.vtensor<[2,64,1280],f64>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f64> | |
%2752 = torch.aten.div.Scalar %2751, %int1280 : !torch.vtensor<[2,64,1],f64>, !torch.int -> !torch.vtensor<[2,64,1],f64> | |
%2753 = torch.aten.to.dtype %2752, %int5, %false, %false, %none : !torch.vtensor<[2,64,1],f64>, !torch.int, !torch.bool, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16> | |
%2754 = torch.aten.sum.dim_IntList %2745, %688, %true, %none : !torch.vtensor<[2,64,1280],f16>, !torch.list<int>, !torch.bool, !torch.none -> !torch.vtensor<[2,64,1],f16> | |
%2755 = torch.aten.div.Scalar %2754, %int1280 : !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1],f16> | |
%2756 = torch.aten.add.Scalar %2753, %float1.000000e-05, %int1 : !torch.vtensor<[2,64,1],f16>, !torch.float, !torch.int -> !torch.vtensor<[2,64,1],f16> | |
%2757 = torch.aten.rsqrt %2756 : !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1],f16> | |
%2758 = torch.aten.sub.Tensor %2745, %2755, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2759 = torch.aten.mul.Tensor %2758, %2757 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1],f16> -> !torch.vtensor<[2,64,1280],f16> | |
%2760 = torch.aten.mul.Tensor %2759, %405 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16> -> !torch.vtensor<[2,64,1280],f16> | |
%2761 = torch.aten.add.Tensor %2760, %404, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2762 = torch.aten.transpose.int %403, %int0, %int1 : !torch.vtensor<[10240,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,10240],f16> | |
%2763 = torch.aten.view %2761, %2635 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16> | |
%2764 = torch.aten.mm %2763, %2762 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,10240],f16> -> !torch.vtensor<[128,10240],f16> | |
%2765 = torch.aten.mul.Scalar %402, %int1 : !torch.vtensor<[10240],f16>, !torch.int -> !torch.vtensor<[10240],f16> | |
%2766 = torch.aten.add.Tensor %2765, %2764, %int1 : !torch.vtensor<[10240],f16>, !torch.vtensor<[128,10240],f16>, !torch.int -> !torch.vtensor<[128,10240],f16> | |
%2767 = torch.prim.ListConstruct %int2, %int64, %int10240 : (!torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2768 = torch.aten.view %2766, %2767 : !torch.vtensor<[128,10240],f16>, !torch.list<int> -> !torch.vtensor<[2,64,10240],f16> | |
%2769 = torch.aten.slice.Tensor %2768, %int-1, %int0, %int5120, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16> | |
%2770 = torch.aten.slice.Tensor %2768, %int-1, %int5120, %int10240, %int1 : !torch.vtensor<[2,64,10240],f16>, !torch.int, !torch.int, !torch.int, !torch.int -> !torch.vtensor<[2,64,5120],f16> | |
%2771 = torch.aten.gelu %2770, %str : !torch.vtensor<[2,64,5120],f16>, !torch.str -> !torch.vtensor<[2,64,5120],f16> | |
%2772 = torch.aten.mul.Tensor %2769, %2771 : !torch.vtensor<[2,64,5120],f16>, !torch.vtensor<[2,64,5120],f16> -> !torch.vtensor<[2,64,5120],f16> | |
%2773 = torch.aten.transpose.int %401, %int0, %int1 : !torch.vtensor<[1280,5120],f16>, !torch.int, !torch.int -> !torch.vtensor<[5120,1280],f16> | |
%2774 = torch.prim.ListConstruct %int128, %int5120 : (!torch.int, !torch.int) -> !torch.list<int> | |
%2775 = torch.aten.view %2772, %2774 : !torch.vtensor<[2,64,5120],f16>, !torch.list<int> -> !torch.vtensor<[128,5120],f16> | |
%2776 = torch.aten.mm %2775, %2773 : !torch.vtensor<[128,5120],f16>, !torch.vtensor<[5120,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2777 = torch.aten.mul.Scalar %400, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2778 = torch.aten.add.Tensor %2777, %2776, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16> | |
%2779 = torch.aten.view %2778, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2780 = torch.aten.add.Tensor %2779, %2745, %int1 : !torch.vtensor<[2,64,1280],f16>, !torch.vtensor<[2,64,1280],f16>, !torch.int -> !torch.vtensor<[2,64,1280],f16> | |
%2781 = torch.aten.transpose.int %399, %int0, %int1 : !torch.vtensor<[1280,1280],f16>, !torch.int, !torch.int -> !torch.vtensor<[1280,1280],f16> | |
%2782 = torch.aten.view %2780, %2635 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[128,1280],f16> | |
%2783 = torch.aten.mm %2782, %2781 : !torch.vtensor<[128,1280],f16>, !torch.vtensor<[1280,1280],f16> -> !torch.vtensor<[128,1280],f16> | |
%2784 = torch.aten.mul.Scalar %398, %int1 : !torch.vtensor<[1280],f16>, !torch.int -> !torch.vtensor<[1280],f16> | |
%2785 = torch.aten.add.Tensor %2784, %2783, %int1 : !torch.vtensor<[1280],f16>, !torch.vtensor<[128,1280],f16>, !torch.int -> !torch.vtensor<[128,1280],f16> | |
%2786 = torch.aten.view %2785, %2033 : !torch.vtensor<[128,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,64,1280],f16> | |
%2787 = torch.prim.ListConstruct %int2, %int8, %int8, %int1280 : (!torch.int, !torch.int, !torch.int, !torch.int) -> !torch.list<int> | |
%2788 = torch.aten.view %2786, %2787 : !torch.vtensor<[2,64,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,8,8,1280],f16> | |
%2789 = torch.aten.permute %2788, %1003 : !torch.vtensor<[2,8,8,1280],f16>, !torch.list<int> -> !torch.vtensor<[2,1280,8,8],f16> | |
%2790 = torch.aten.clone %2789, %int0 : !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtensor<[2,1280,8,8],f16> | |
%2791 = torch.aten.add.Tensor %2790, %2583, %int1 : !torch.vtensor<[2,1280,8,8],f16>, !torch.vtensor<[2,1280,8,8],f16>, !torch.int -> !torch.vtenso |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment