Created
April 22, 2022 16:56
-
-
Save pashu123/175d4f7cc614c1c1ab5d6861bc425bb5 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:16: TracerWarning: Converting a tensor to a Python float might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs! | |
| / math.sqrt(query.size(-1)) | |
| /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/jit/_trace.py:983: TracerWarning: Output nr 1. of the traced function does not match the corresponding output of the Python function. Detailed error: | |
| Tensor-likes are not close! | |
| Mismatched elements: 97297 / 98304 (99.0%) | |
| Greatest absolute difference: 24.81045150756836 at index (0, 71, 4) (up to 1e-05 allowed) | |
| Greatest relative difference: inf at index (0, 0, 5) (up to 1e-05 allowed) | |
| _check_trace( | |
| Traceback (most recent call last): | |
| File "/home/prashant/dSHARK/shark/examples/bert_torch.py", line 21, in <module> | |
| shark_module = SharkInference( | |
| File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/shark/shark_runner.py", line 105, in __init__ | |
| self.shark_runner = SharkRunner(self.model, self.input, dynamic, device, | |
| File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/shark/shark_runner.py", line 62, in __init__ | |
| self.torch_mlir_module = get_torch_mlir_module(model, input, dynamic, | |
| File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/shark/torch_mlir_utils.py", line 114, in get_torch_mlir_module | |
| module = shark_jit_trace(module, input, dynamic, tracing_required) | |
| File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/shark/torch_mlir_utils.py", line 77, in shark_jit_trace | |
| traced_module = torch.jit.trace_module(module, {"forward": input}) | |
| File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/jit/_trace.py", line 983, in trace_module | |
| _check_trace( | |
| File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context | |
| return func(*args, **kwargs) | |
| File "/home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/jit/_trace.py", line 526, in _check_trace | |
| raise TracingCheckError(*diag_info) | |
| torch.jit._trace.TracingCheckError: Tracing failed sanity checks! | |
| ERROR: Graphs differed across invocations! | |
| Graph diff: | |
| graph(%self.1 : __torch__.BERT_torch, | |
| %tokens : Tensor): | |
| %model : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model) | |
| %_11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks) | |
| %dropout : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_11) | |
| + %model.505 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.503 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.505) | |
| + %_11.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.503) | |
| + %feed_forward : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.39) | |
| + %w_2 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward) | |
| %model.503 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.501 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.503) | |
| - %_11.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.501) | |
| ? ^ | |
| + %_11.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.501) | |
| ? ^ | |
| - %feed_forward : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.39) | |
| ? ^ | |
| + %feed_forward.93 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.37) | |
| ? +++ ^ | |
| - %w_2 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward) | |
| ? ^^^ ^^^^^ ^^^^^ ^^^ | |
| + %dropout.143 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.93) | |
| ? ^^^^^^^^^^^ ^ +++++ ^ +++++ ^^^^^^^ +++ | |
| %model.501 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.499 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.501) | |
| - %_11.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.499) | |
| ? ^ | |
| + %_11.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.499) | |
| ? ^ | |
| - %feed_forward.93 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.37) | |
| ? ^ ^ | |
| + %feed_forward.91 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.35) | |
| ? ^ ^ | |
| - %dropout.143 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.93) | |
| + %activation : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.91) | |
| %model.499 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.497 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.499) | |
| - %_11.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.497) | |
| ? ^ | |
| + %_11.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.497) | |
| ? ^ | |
| - %feed_forward.91 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.35) | |
| ? - ^ | |
| + %feed_forward.89 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.33) | |
| ? + ^ | |
| - %activation : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.91) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ - | |
| + %w_1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.89) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ + | |
| %model.497 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.495 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.497) | |
| - %_11.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.495) | |
| ? ^ | |
| + %_11.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.495) | |
| ? ^ | |
| + %output_sublayer : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_11.31) | |
| - %feed_forward.89 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_11.33) | |
| - %w_1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.89) | |
| %model.495 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.493 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.495) | |
| - %_11.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.493) | |
| - %output_sublayer : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_11.31) | |
| - %model.493 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.491 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.493) | |
| - %_11.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.491) | |
| ? ^ | |
| + %_11.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.493) | |
| ? ^ | |
| %lambda_module : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.29) | |
| %attention : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module) | |
| %output_linear : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention) | |
| + %model.493 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.491 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.493) | |
| + %_11.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.491) | |
| + %lambda_module.143 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.27) | |
| + %attention.357 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.143) | |
| + %dropout.137 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.357) | |
| + %dropout.139 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.137) | |
| %model.491 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.489 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.491) | |
| - %_11.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.489) | |
| ? ^ | |
| + %_11.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.489) | |
| ? ^ | |
| - %lambda_module.141 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.27) | |
| ? ^ | |
| + %lambda_module.141 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.25) | |
| ? ^ | |
| - %attention.357 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.141) | |
| ? ^ | |
| + %attention.353 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.141) | |
| ? ^ | |
| + %attention.355 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.353) | |
| - %dropout.137 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.357) | |
| - %dropout.139 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.137) | |
| %model.489 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.487 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.489) | |
| - %_11.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.487) | |
| ? ^ | |
| + %_11.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.487) | |
| ? ^ | |
| - %lambda_module.139 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.25) | |
| ? ^ | |
| + %lambda_module.139 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.23) | |
| ? ^ | |
| - %attention.353 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.139) | |
| ? ^ | |
| + %attention.351 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.139) | |
| ? ^ | |
| - %attention.355 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.353) | |
| + %linear_layers : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.351) | |
| + %_2 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers) | |
| %model.487 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.485 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.487) | |
| - %_11.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.485) | |
| ? ^ | |
| + %_11.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.485) | |
| ? ^ | |
| - %lambda_module.137 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.23) | |
| ? ^ | |
| + %lambda_module.137 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.21) | |
| ? ^ | |
| - %attention.351 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.137) | |
| ? ^^ | |
| + %attention.349 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.137) | |
| ? ^^ | |
| - %linear_layers : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.351) | |
| ? ^^ | |
| + %linear_layers.213 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.349) | |
| ? ++++ ^^ | |
| - %_2 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers) | |
| ? ^ ^ | |
| + %_1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.213) | |
| ? ^ ^ ++++ | |
| %model.485 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.483 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.485) | |
| - %_11.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.483) | |
| ? - | |
| + %_11.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.483) | |
| ? + | |
| - %lambda_module.135 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.21) | |
| ? - | |
| + %lambda_module.135 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.19) | |
| ? + | |
| - %attention.349 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.135) | |
| ? ^ | |
| + %attention.347 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.135) | |
| ? ^ | |
| - %linear_layers.213 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.349) | |
| ? ^ ^ | |
| + %linear_layers.211 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.347) | |
| ? ^ ^ | |
| - %_1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.213) | |
| ? ^ ^ ^ | |
| + %_0 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.211) | |
| ? ^ ^ ^ | |
| %model.483 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.481 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.483) | |
| - %_11.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.481) | |
| ? ^ | |
| + %_11.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.481) | |
| ? ^ | |
| + %input_sublayer : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_11.17) | |
| - %lambda_module.133 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_11.19) | |
| - %attention.347 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.133) | |
| - %linear_layers.211 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.347) | |
| - %_0 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.211) | |
| %model.481 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.479 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.481) | |
| - %_11.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.479) | |
| ? ^^^^ ^ | |
| + %_10 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.479) | |
| ? ^ ^ | |
| - %input_sublayer : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_11.17) | |
| + %dropout.133 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_10) | |
| %model.479 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.477 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.479) | |
| - %_10 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.477) | |
| + %_10.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.477) | |
| ? +++ | |
| - %dropout.133 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_10) | |
| + %feed_forward.87 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.39) | |
| + %w_2.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.87) | |
| %model.477 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.475 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.477) | |
| - %_10.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.475) | |
| ? ^ | |
| + %_10.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.475) | |
| ? ^ | |
| - %feed_forward.87 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.39) | |
| ? ^ ^ | |
| + %feed_forward.85 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.37) | |
| ? ^ ^ | |
| - %w_2.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.87) | |
| ? ^^^ - ^^^^^ ^^^^^ ^^^ ^ | |
| + %dropout.131 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.85) | |
| ? ^^^^^^^ ++ ^ +++++ ^ +++++ ^^^^^^^ ^ | |
| %model.475 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.473 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.475) | |
| - %_10.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.473) | |
| ? ^ | |
| + %_10.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.473) | |
| ? ^ | |
| - %feed_forward.85 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.37) | |
| ? ^ ^ | |
| + %feed_forward.83 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.35) | |
| ? ^ ^ | |
| - %dropout.131 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.85) | |
| + %activation.21 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.83) | |
| %model.473 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.471 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.473) | |
| - %_10.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.471) | |
| ? ^ | |
| + %_10.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.471) | |
| ? ^ | |
| - %feed_forward.83 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.35) | |
| ? ^ ^ | |
| + %feed_forward.81 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.33) | |
| ? ^ ^ | |
| - %activation.21 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.83) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
| + %w_1.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.81) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.471 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.469 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.471) | |
| - %_10.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.469) | |
| ? ^ | |
| + %_10.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.469) | |
| ? ^ | |
| + %output_sublayer.21 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_10.31) | |
| - %feed_forward.81 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_10.33) | |
| - %w_1.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.81) | |
| %model.469 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.467 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.469) | |
| - %_10.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.467) | |
| ? ^^ | |
| + %_10.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.467) | |
| ? ^^ | |
| - %output_sublayer.21 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_10.31) | |
| + %lambda_module.133 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.29) | |
| + %attention.345 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.133) | |
| + %output_linear.69 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.345) | |
| %model.467 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.465 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.467) | |
| - %_10.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.465) | |
| ? ^ | |
| + %_10.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.465) | |
| ? ^ | |
| - %lambda_module.131 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.29) | |
| ? ^ | |
| + %lambda_module.131 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.27) | |
| ? ^ | |
| - %attention.345 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.131) | |
| ? ^ | |
| + %attention.343 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.131) | |
| ? ^ | |
| - %output_linear.69 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.345) | |
| + %dropout.125 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.343) | |
| + %dropout.127 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.125) | |
| %model.465 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.463 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.465) | |
| - %_10.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.463) | |
| ? ^ | |
| + %_10.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.463) | |
| ? ^ | |
| - %lambda_module.129 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.27) | |
| ? ^ | |
| + %lambda_module.129 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.25) | |
| ? ^ | |
| - %attention.343 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.129) | |
| ? - | |
| + %attention.339 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.129) | |
| ? + | |
| + %attention.341 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.339) | |
| - %dropout.125 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.343) | |
| - %dropout.127 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.125) | |
| %model.463 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.461 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.463) | |
| - %_10.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.461) | |
| ? ^ | |
| + %_10.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.461) | |
| ? ^ | |
| - %lambda_module.127 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.25) | |
| ? ^ | |
| + %lambda_module.127 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.23) | |
| ? ^ | |
| - %attention.339 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.127) | |
| ? ^ | |
| + %attention.337 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.127) | |
| ? ^ | |
| - %attention.341 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.339) | |
| + %linear_layers.209 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.337) | |
| + %_2.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.209) | |
| %model.461 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.459 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.461) | |
| - %_10.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.459) | |
| ? ^ | |
| + %_10.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.459) | |
| ? ^ | |
| - %lambda_module.125 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.23) | |
| ? ^ | |
| + %lambda_module.125 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.21) | |
| ? ^ | |
| - %attention.337 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.125) | |
| ? ^ | |
| + %attention.335 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.125) | |
| ? ^ | |
| - %linear_layers.209 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.337) | |
| ? ^ ^ | |
| + %linear_layers.207 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.335) | |
| ? ^ ^ | |
| - %_2.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.209) | |
| ? ^ ^ ^ | |
| + %_1.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.207) | |
| ? ^ ^ ^ | |
| %model.459 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.457 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.459) | |
| - %_10.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.457) | |
| ? - | |
| + %_10.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.457) | |
| ? + | |
| - %lambda_module.123 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.21) | |
| ? - | |
| + %lambda_module.123 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.19) | |
| ? + | |
| - %attention.335 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.123) | |
| ? ^ | |
| + %attention.333 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.123) | |
| ? ^ | |
| - %linear_layers.207 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.335) | |
| ? ^ ^ | |
| + %linear_layers.205 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.333) | |
| ? ^ ^ | |
| - %_1.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.207) | |
| ? ^ ^ ^ ^ | |
| + %_0.113 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.205) | |
| ? ^ ^ ^ ^ | |
| %model.457 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.455 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.457) | |
| - %_10.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.455) | |
| ? ^ | |
| + %_10.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.455) | |
| ? ^ | |
| + %input_sublayer.21 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_10.17) | |
| - %lambda_module.121 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_10.19) | |
| - %attention.333 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.121) | |
| - %linear_layers.205 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.333) | |
| - %_0.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.205) | |
| %model.455 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.453 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.455) | |
| - %_10.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.453) | |
| ? ^^^^^ ^^ | |
| + %_9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.453) | |
| ? ^ ^ | |
| - %input_sublayer.21 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_10.17) | |
| + %dropout.121 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_9) | |
| %model.453 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.451 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.453) | |
| - %_9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.451) | |
| + %_9.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.451) | |
| ? +++ | |
| - %dropout.121 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_9) | |
| + %feed_forward.79 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.39) | |
| + %w_2.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.79) | |
| %model.451 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.449 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.451) | |
| - %_9.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.449) | |
| ? ^ | |
| + %_9.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.449) | |
| ? ^ | |
| - %feed_forward.79 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.39) | |
| ? ^ ^ | |
| + %feed_forward.77 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.37) | |
| ? ^ ^ | |
| - %w_2.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.79) | |
| ? ^^^ ^^^^^ ^^^^^ ^^^ ^ | |
| + %dropout.119 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.77) | |
| ? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
| %model.449 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.447 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.449) | |
| - %_9.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.447) | |
| ? ^ | |
| + %_9.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.447) | |
| ? ^ | |
| - %feed_forward.77 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.37) | |
| ? ^ ^ | |
| + %feed_forward.75 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.35) | |
| ? ^ ^ | |
| - %dropout.119 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.77) | |
| + %activation.19 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.75) | |
| %model.447 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.445 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.447) | |
| - %_9.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.445) | |
| ? ^ | |
| + %_9.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.445) | |
| ? ^ | |
| - %feed_forward.75 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.35) | |
| ? ^ ^ | |
| + %feed_forward.73 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.33) | |
| ? ^ ^ | |
| - %activation.19 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.75) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
| + %w_1.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.73) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.445 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.443 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.445) | |
| - %_9.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.443) | |
| ? ^ | |
| + %_9.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.443) | |
| ? ^ | |
| + %output_sublayer.19 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_9.31) | |
| - %feed_forward.73 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_9.33) | |
| - %w_1.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.73) | |
| %model.443 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.441 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.443) | |
| - %_9.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.441) | |
| ? ^^ | |
| + %_9.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.441) | |
| ? ^^ | |
| - %output_sublayer.19 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_9.31) | |
| + %lambda_module.121 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.29) | |
| + %attention.331 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.121) | |
| + %output_linear.67 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.331) | |
| %model.441 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.439 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.441) | |
| - %_9.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.439) | |
| ? ^ | |
| + %_9.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.439) | |
| ? ^ | |
| - %lambda_module.119 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.29) | |
| ? ^ | |
| + %lambda_module.119 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.27) | |
| ? ^ | |
| - %attention.331 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.119) | |
| ? ^^ | |
| + %attention.329 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.119) | |
| ? ^^ | |
| - %output_linear.67 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.331) | |
| + %dropout.113 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.329) | |
| + %dropout.115 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.113) | |
| %model.439 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.437 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.439) | |
| - %_9.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.437) | |
| ? ^ | |
| + %_9.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.437) | |
| ? ^ | |
| - %lambda_module.117 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.27) | |
| ? ^ | |
| + %lambda_module.117 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.25) | |
| ? ^ | |
| - %attention.329 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.117) | |
| ? ^ | |
| + %attention.325 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.117) | |
| ? ^ | |
| + %attention.327 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.325) | |
| - %dropout.113 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.329) | |
| - %dropout.115 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.113) | |
| %model.437 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.435 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.437) | |
| - %_9.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.435) | |
| ? ^ | |
| + %_9.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.435) | |
| ? ^ | |
| - %lambda_module.115 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.25) | |
| ? ^ | |
| + %lambda_module.115 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.23) | |
| ? ^ | |
| - %attention.325 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.115) | |
| ? ^ | |
| + %attention.323 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.115) | |
| ? ^ | |
| - %attention.327 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.325) | |
| + %linear_layers.203 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.323) | |
| + %_2.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.203) | |
| %model.435 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.433 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.435) | |
| - %_9.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.433) | |
| ? ^ | |
| + %_9.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.433) | |
| ? ^ | |
| - %lambda_module.113 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.23) | |
| ? ^ | |
| + %lambda_module.113 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.21) | |
| ? ^ | |
| - %attention.323 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.113) | |
| ? ^ | |
| + %attention.321 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.113) | |
| ? ^ | |
| - %linear_layers.203 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.323) | |
| ? ^ ^ | |
| + %linear_layers.201 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.321) | |
| ? ^ ^ | |
| - %_2.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.203) | |
| ? ^ ^ ^ | |
| + %_1.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.201) | |
| ? ^ ^ ^ | |
| %model.433 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.431 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.433) | |
| - %_9.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.431) | |
| ? - | |
| + %_9.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.431) | |
| ? + | |
| - %lambda_module.111 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.21) | |
| ? - | |
| + %lambda_module.111 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.19) | |
| ? + | |
| - %attention.321 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.111) | |
| ? - | |
| + %attention.319 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.111) | |
| ? + | |
| - %linear_layers.201 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.321) | |
| ? -- - | |
| + %linear_layers.199 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.319) | |
| ? ++ + | |
| - %_1.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.201) | |
| ? ^ ^^ ^ -- | |
| + %_0.111 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.199) | |
| ? ^ ^^ ^ ++ | |
| %model.431 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.429 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.431) | |
| - %_9.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.429) | |
| ? ^ | |
| + %_9.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.429) | |
| ? ^ | |
| + %input_sublayer.19 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_9.17) | |
| - %lambda_module.109 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_9.19) | |
| - %attention.319 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.109) | |
| - %linear_layers.199 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.319) | |
| - %_0.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.199) | |
| %model.429 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.427 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.429) | |
| - %_9.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.427) | |
| ? ^^^^ ^ | |
| + %_8 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.427) | |
| ? ^ ^ | |
| - %input_sublayer.19 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_9.17) | |
| + %dropout.109 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_8) | |
| %model.427 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.425 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.427) | |
| - %_8 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.425) | |
| + %_8.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.425) | |
| ? +++ | |
| - %dropout.109 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_8) | |
| + %feed_forward.71 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.39) | |
| + %w_2.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.71) | |
| %model.425 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.423 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.425) | |
| - %_8.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.423) | |
| ? ^ | |
| + %_8.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.423) | |
| ? ^ | |
| - %feed_forward.71 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.39) | |
| ? ^^ ^ | |
| + %feed_forward.69 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.37) | |
| ? ^^ ^ | |
| - %w_2.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.71) | |
| ? ^^^ ^^^^^ ^^^^^ ^^^ ^^ | |
| + %dropout.107 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.69) | |
| ? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^^ | |
| %model.423 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.421 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.423) | |
| - %_8.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.421) | |
| ? ^ | |
| + %_8.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.421) | |
| ? ^ | |
| - %feed_forward.69 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.37) | |
| ? ^ ^ | |
| + %feed_forward.67 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.35) | |
| ? ^ ^ | |
| - %dropout.107 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.69) | |
| + %activation.17 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.67) | |
| %model.421 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.419 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.421) | |
| - %_8.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.419) | |
| ? ^ | |
| + %_8.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.419) | |
| ? ^ | |
| - %feed_forward.67 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.35) | |
| ? ^ ^ | |
| + %feed_forward.65 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.33) | |
| ? ^ ^ | |
| - %activation.17 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.67) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
| + %w_1.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.65) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.419 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.417 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.419) | |
| - %_8.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.417) | |
| ? ^ | |
| + %_8.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.417) | |
| ? ^ | |
| + %output_sublayer.17 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_8.31) | |
| - %feed_forward.65 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_8.33) | |
| - %w_1.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.65) | |
| %model.417 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.415 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.417) | |
| - %_8.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.415) | |
| ? ^^ | |
| + %_8.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.415) | |
| ? ^^ | |
| - %output_sublayer.17 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_8.31) | |
| + %lambda_module.109 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.29) | |
| + %attention.317 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.109) | |
| + %output_linear.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.317) | |
| %model.415 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.413 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.415) | |
| - %_8.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.413) | |
| ? ^ | |
| + %_8.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.413) | |
| ? ^ | |
| - %lambda_module.107 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.29) | |
| ? ^ | |
| + %lambda_module.107 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.27) | |
| ? ^ | |
| - %attention.317 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.107) | |
| ? ^ | |
| + %attention.315 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.107) | |
| ? ^ | |
| - %output_linear.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.317) | |
| + %dropout.101 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.315) | |
| + %dropout.103 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.101) | |
| %model.413 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.411 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.413) | |
| - %_8.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.411) | |
| ? ^ | |
| + %_8.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.411) | |
| ? ^ | |
| - %lambda_module.105 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.27) | |
| ? ^ | |
| + %lambda_module.105 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.25) | |
| ? ^ | |
| - %attention.315 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.105) | |
| ? ^ | |
| + %attention.311 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.105) | |
| ? ^ | |
| + %attention.313 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.311) | |
| - %dropout.101 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.315) | |
| - %dropout.103 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.101) | |
| %model.411 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.409 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.411) | |
| - %_8.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.409) | |
| ? ^ | |
| + %_8.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.409) | |
| ? ^ | |
| - %lambda_module.103 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.25) | |
| ? ^ | |
| + %lambda_module.103 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.23) | |
| ? ^ | |
| - %attention.311 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.103) | |
| ? ^^ | |
| + %attention.309 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.103) | |
| ? ^^ | |
| - %attention.313 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.311) | |
| + %linear_layers.197 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.309) | |
| + %_2.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.197) | |
| %model.409 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.407 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.409) | |
| - %_8.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.407) | |
| ? ^ | |
| + %_8.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.407) | |
| ? ^ | |
| - %lambda_module.101 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.23) | |
| ? ^ | |
| + %lambda_module.101 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.21) | |
| ? ^ | |
| - %attention.309 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.101) | |
| ? ^ | |
| + %attention.307 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.101) | |
| ? ^ | |
| - %linear_layers.197 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.309) | |
| ? ^ ^ | |
| + %linear_layers.195 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.307) | |
| ? ^ ^ | |
| - %_2.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.197) | |
| ? ^ ^ ^ | |
| + %_1.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.195) | |
| ? ^ ^ ^ | |
| %model.407 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.405 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.407) | |
| - %_8.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.405) | |
| ? - | |
| + %_8.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.405) | |
| ? + | |
| - %lambda_module.99 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.21) | |
| ? - | |
| + %lambda_module.99 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.19) | |
| ? + | |
| - %attention.307 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.99) | |
| ? ^ | |
| + %attention.305 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.99) | |
| ? ^ | |
| - %linear_layers.195 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.307) | |
| ? ^ ^ | |
| + %linear_layers.193 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.305) | |
| ? ^ ^ | |
| - %_1.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.195) | |
| ? ^ ^ ^ ^ | |
| + %_0.109 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.193) | |
| ? ^ ^ ^ ^ | |
| %model.405 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.403 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.405) | |
| - %_8.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.403) | |
| ? ^ | |
| + %_8.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.403) | |
| ? ^ | |
| + %input_sublayer.17 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_8.17) | |
| - %lambda_module.97 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_8.19) | |
| - %attention.305 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.97) | |
| - %linear_layers.193 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.305) | |
| - %_0.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.193) | |
| %model.403 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.401 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.403) | |
| - %_8.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.401) | |
| ? --- ^ | |
| + %_7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.401) | |
| ? ^ | |
| - %input_sublayer.17 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_8.17) | |
| + %dropout.97 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_7) | |
| %model.401 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.399 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.401) | |
| - %_7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.399) | |
| + %_7.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.399) | |
| ? +++ | |
| - %dropout.97 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_7) | |
| + %feed_forward.63 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.39) | |
| + %w_2.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.63) | |
| %model.399 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.397 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.399) | |
| - %_7.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.397) | |
| ? ^ | |
| + %_7.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.397) | |
| ? ^ | |
| - %feed_forward.63 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.39) | |
| ? ^ ^ | |
| + %feed_forward.61 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.37) | |
| ? ^ ^ | |
| - %w_2.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.63) | |
| ? ^^^ ^ ^^^^^ ^^^^^ ^^^ ^ | |
| + %dropout.95 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.61) | |
| ? ^^^^^^^ ^ ^ +++++ ^ +++++ ^^^^^^^ ^ | |
| %model.397 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.395 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.397) | |
| - %_7.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.395) | |
| ? ^ | |
| + %_7.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.395) | |
| ? ^ | |
| - %feed_forward.61 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.37) | |
| ? ^^ ^ | |
| + %feed_forward.59 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.35) | |
| ? ^^ ^ | |
| - %dropout.95 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.61) | |
| + %activation.15 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.59) | |
| %model.395 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.393 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.395) | |
| - %_7.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.393) | |
| ? ^ | |
| + %_7.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.393) | |
| ? ^ | |
| - %feed_forward.59 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.35) | |
| ? ^ ^ | |
| + %feed_forward.57 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.33) | |
| ? ^ ^ | |
| - %activation.15 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.59) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
| + %w_1.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.57) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.393 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.391 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.393) | |
| - %_7.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.391) | |
| ? ^ | |
| + %_7.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.391) | |
| ? ^ | |
| + %output_sublayer.15 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_7.31) | |
| - %feed_forward.57 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_7.33) | |
| - %w_1.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.57) | |
| %model.391 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.389 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.391) | |
| - %_7.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.389) | |
| ? ^^ | |
| + %_7.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.389) | |
| ? ^^ | |
| - %output_sublayer.15 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_7.31) | |
| + %lambda_module.97 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.29) | |
| + %attention.303 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.97) | |
| + %output_linear.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.303) | |
| %model.389 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.387 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.389) | |
| - %_7.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.387) | |
| ? ^ | |
| + %_7.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.387) | |
| ? ^ | |
| - %lambda_module.95 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.29) | |
| ? ^ | |
| + %lambda_module.95 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.27) | |
| ? ^ | |
| - %attention.303 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.95) | |
| ? ^ | |
| + %attention.301 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.95) | |
| ? ^ | |
| - %output_linear.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.303) | |
| + %dropout.89 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.301) | |
| + %dropout.91 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.89) | |
| %model.387 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.385 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.387) | |
| - %_7.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.385) | |
| ? ^ | |
| + %_7.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.385) | |
| ? ^ | |
| - %lambda_module.93 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.27) | |
| ? ^ | |
| + %lambda_module.93 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.25) | |
| ? ^ | |
| - %attention.301 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.93) | |
| ? ^^^ | |
| + %attention.297 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.93) | |
| ? ^^^ | |
| + %attention.299 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.297) | |
| - %dropout.89 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.301) | |
| - %dropout.91 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.89) | |
| %model.385 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.383 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.385) | |
| - %_7.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.383) | |
| ? ^ | |
| + %_7.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.383) | |
| ? ^ | |
| - %lambda_module.91 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.25) | |
| ? ^ | |
| + %lambda_module.91 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.23) | |
| ? ^ | |
| - %attention.297 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.91) | |
| ? ^ | |
| + %attention.295 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.91) | |
| ? ^ | |
| - %attention.299 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.297) | |
| + %linear_layers.191 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.295) | |
| + %_2.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.191) | |
| %model.383 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.381 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.383) | |
| - %_7.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.381) | |
| ? ^ | |
| + %_7.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.381) | |
| ? ^ | |
| - %lambda_module.89 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.23) | |
| ? ^ | |
| + %lambda_module.89 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.21) | |
| ? ^ | |
| - %attention.295 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.89) | |
| ? ^ | |
| + %attention.293 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.89) | |
| ? ^ | |
| - %linear_layers.191 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.295) | |
| ? - ^ | |
| + %linear_layers.189 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.293) | |
| ? + ^ | |
| - %_2.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.191) | |
| ? ^ ^ - | |
| + %_1.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.189) | |
| ? ^ ^ + | |
| %model.381 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.379 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.381) | |
| - %_7.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.379) | |
| ? - | |
| + %_7.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.379) | |
| ? + | |
| - %lambda_module.87 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.21) | |
| ? - | |
| + %lambda_module.87 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.19) | |
| ? + | |
| - %attention.293 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.87) | |
| ? ^ | |
| + %attention.291 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.87) | |
| ? ^ | |
| - %linear_layers.189 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.293) | |
| ? ^ ^ | |
| + %linear_layers.187 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.291) | |
| ? ^ ^ | |
| - %_1.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.189) | |
| ? ^ ^ ^ ^ | |
| + %_0.107 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.187) | |
| ? ^ ^ ^ ^ | |
| %model.379 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.377 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.379) | |
| - %_7.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.377) | |
| ? ^ | |
| + %_7.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.377) | |
| ? ^ | |
| + %input_sublayer.15 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_7.17) | |
| - %lambda_module.85 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_7.19) | |
| - %attention.291 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.85) | |
| - %linear_layers.187 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.291) | |
| - %_0.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.187) | |
| %model.377 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.375 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.377) | |
| - %_7.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.375) | |
| ? ^^^^ ^ | |
| + %_6 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.375) | |
| ? ^ ^ | |
| - %input_sublayer.15 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_7.17) | |
| + %dropout.85 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_6) | |
| %model.375 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.373 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.375) | |
| - %_6 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.373) | |
| + %_6.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.373) | |
| ? +++ | |
| - %dropout.85 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_6) | |
| + %feed_forward.55 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.39) | |
| + %w_2.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.55) | |
| %model.373 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.371 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.373) | |
| - %_6.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.371) | |
| ? ^ | |
| + %_6.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.371) | |
| ? ^ | |
| - %feed_forward.55 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.39) | |
| ? ^ ^ | |
| + %feed_forward.53 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.37) | |
| ? ^ ^ | |
| - %w_2.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.55) | |
| ? ^^^ ^ ^^^^^ ^^^^^ ^^^ ^ | |
| + %dropout.83 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.53) | |
| ? ^^^^^^^ ^ ^ +++++ ^ +++++ ^^^^^^^ ^ | |
| %model.371 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.369 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.371) | |
| - %_6.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.369) | |
| ? ^ | |
| + %_6.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.369) | |
| ? ^ | |
| - %feed_forward.53 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.37) | |
| ? ^ ^ | |
| + %feed_forward.51 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.35) | |
| ? ^ ^ | |
| - %dropout.83 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.53) | |
| + %activation.13 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.51) | |
| %model.369 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.367 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.369) | |
| - %_6.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.367) | |
| ? ^ | |
| + %_6.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.367) | |
| ? ^ | |
| - %feed_forward.51 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.35) | |
| ? ^^ ^ | |
| + %feed_forward.49 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.33) | |
| ? ^^ ^ | |
| - %activation.13 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.51) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^^ | |
| + %w_1.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.49) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^^ | |
| %model.367 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.365 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.367) | |
| - %_6.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.365) | |
| ? ^ | |
| + %_6.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.365) | |
| ? ^ | |
| + %output_sublayer.13 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_6.31) | |
| - %feed_forward.49 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_6.33) | |
| - %w_1.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.49) | |
| %model.365 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.363 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.365) | |
| - %_6.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.363) | |
| ? ^^ | |
| + %_6.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.363) | |
| ? ^^ | |
| - %output_sublayer.13 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_6.31) | |
| + %lambda_module.85 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.29) | |
| + %attention.289 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.85) | |
| + %output_linear.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.289) | |
| %model.363 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.361 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.363) | |
| - %_6.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.361) | |
| ? ^ | |
| + %_6.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.361) | |
| ? ^ | |
| - %lambda_module.83 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.29) | |
| ? ^ | |
| + %lambda_module.83 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.27) | |
| ? ^ | |
| - %attention.289 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.83) | |
| ? ^ | |
| + %attention.287 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.83) | |
| ? ^ | |
| - %output_linear.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.289) | |
| + %dropout.77 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.287) | |
| + %dropout.79 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.77) | |
| %model.361 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.359 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.361) | |
| - %_6.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.359) | |
| ? ^ | |
| + %_6.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.359) | |
| ? ^ | |
| - %lambda_module.81 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.27) | |
| ? ^ | |
| + %lambda_module.81 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.25) | |
| ? ^ | |
| - %attention.287 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.81) | |
| ? ^ | |
| + %attention.283 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.81) | |
| ? ^ | |
| + %attention.285 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.283) | |
| - %dropout.77 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.287) | |
| - %dropout.79 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.77) | |
| %model.359 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.357 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.359) | |
| - %_6.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.357) | |
| ? ^ | |
| + %_6.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.357) | |
| ? ^ | |
| - %lambda_module.79 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.25) | |
| ? ^ | |
| + %lambda_module.79 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.23) | |
| ? ^ | |
| - %attention.283 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.79) | |
| ? ^ | |
| + %attention.281 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.79) | |
| ? ^ | |
| - %attention.285 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.283) | |
| + %linear_layers.185 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.281) | |
| + %_2.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.185) | |
| %model.357 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.355 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.357) | |
| - %_6.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.355) | |
| ? ^ | |
| + %_6.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.355) | |
| ? ^ | |
| - %lambda_module.77 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.23) | |
| ? ^ | |
| + %lambda_module.77 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.21) | |
| ? ^ | |
| - %attention.281 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.77) | |
| ? ^^ | |
| + %attention.279 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.77) | |
| ? ^^ | |
| - %linear_layers.185 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.281) | |
| ? ^ ^^ | |
| + %linear_layers.183 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.279) | |
| ? ^ ^^ | |
| - %_2.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.185) | |
| ? ^ ^ ^ | |
| + %_1.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.183) | |
| ? ^ ^ ^ | |
| %model.355 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.353 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.355) | |
| - %_6.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.353) | |
| ? - | |
| + %_6.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.353) | |
| ? + | |
| - %lambda_module.75 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.21) | |
| ? - | |
| + %lambda_module.75 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.19) | |
| ? + | |
| - %attention.279 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.75) | |
| ? ^ | |
| + %attention.277 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.75) | |
| ? ^ | |
| - %linear_layers.183 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.279) | |
| ? ^ ^ | |
| + %linear_layers.181 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.277) | |
| ? ^ ^ | |
| - %_1.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.183) | |
| ? ^ ^ ^ ^ | |
| + %_0.105 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.181) | |
| ? ^ ^ ^ ^ | |
| %model.353 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.351 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.353) | |
| - %_6.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.351) | |
| ? ^ | |
| + %_6.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.351) | |
| ? ^ | |
| + %input_sublayer.13 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_6.17) | |
| - %lambda_module.73 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_6.19) | |
| - %attention.277 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.73) | |
| - %linear_layers.181 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.277) | |
| - %_0.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.181) | |
| %model.351 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.349 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.351) | |
| - %_6.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.349) | |
| ? ^^^^ ^ | |
| + %_5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.349) | |
| ? ^ ^ | |
| - %input_sublayer.13 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_6.17) | |
| + %dropout.73 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_5) | |
| %model.349 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.347 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.349) | |
| - %_5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.347) | |
| + %_5.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.347) | |
| ? +++ | |
| - %dropout.73 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_5) | |
| + %feed_forward.47 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.39) | |
| + %w_2.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.47) | |
| %model.347 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.345 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.347) | |
| - %_5.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.345) | |
| ? ^ | |
| + %_5.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.345) | |
| ? ^ | |
| - %feed_forward.47 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.39) | |
| ? ^ ^ | |
| + %feed_forward.45 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.37) | |
| ? ^ ^ | |
| - %w_2.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.47) | |
| ? ^^^ - ^^^^^ ^^^^^ ^^^ ^ | |
| + %dropout.71 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.45) | |
| ? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
| %model.345 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.343 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.345) | |
| - %_5.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.343) | |
| ? ^ | |
| + %_5.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.343) | |
| ? ^ | |
| - %feed_forward.45 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.37) | |
| ? ^ ^ | |
| + %feed_forward.43 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.35) | |
| ? ^ ^ | |
| - %dropout.71 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.45) | |
| + %activation.11 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.43) | |
| %model.343 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.341 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.343) | |
| - %_5.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.341) | |
| ? ^ | |
| + %_5.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.341) | |
| ? ^ | |
| - %feed_forward.43 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.35) | |
| ? ^ ^ | |
| + %feed_forward.41 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.33) | |
| ? ^ ^ | |
| - %activation.11 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.43) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
| + %w_1.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.41) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.341 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.339 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.341) | |
| - %_5.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.339) | |
| ? ^ | |
| + %_5.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.339) | |
| ? ^ | |
| + %output_sublayer.11 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_5.31) | |
| - %feed_forward.41 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_5.33) | |
| - %w_1.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.41) | |
| %model.339 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.337 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.339) | |
| - %_5.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.337) | |
| ? ^^ | |
| + %_5.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.337) | |
| ? ^^ | |
| - %output_sublayer.11 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_5.31) | |
| + %lambda_module.73 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.29) | |
| + %attention.275 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.73) | |
| + %output_linear.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.275) | |
| %model.337 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.335 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.337) | |
| - %_5.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.335) | |
| ? ^ | |
| + %_5.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.335) | |
| ? ^ | |
| - %lambda_module.71 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.29) | |
| ? ^ | |
| + %lambda_module.71 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.27) | |
| ? ^ | |
| - %attention.275 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.71) | |
| ? ^ | |
| + %attention.273 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.71) | |
| ? ^ | |
| - %output_linear.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.275) | |
| + %dropout.65 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.273) | |
| + %dropout.67 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.65) | |
| %model.335 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.333 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.335) | |
| - %_5.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.333) | |
| ? ^ | |
| + %_5.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.333) | |
| ? ^ | |
| - %lambda_module.69 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.27) | |
| ? ^ | |
| + %lambda_module.69 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.25) | |
| ? ^ | |
| - %attention.273 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.69) | |
| ? ^^ | |
| + %attention.269 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.69) | |
| ? ^^ | |
| + %attention.271 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.269) | |
| - %dropout.65 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.273) | |
| - %dropout.67 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.65) | |
| %model.333 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.331 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.333) | |
| - %_5.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.331) | |
| ? ^ | |
| + %_5.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.331) | |
| ? ^ | |
| - %lambda_module.67 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.25) | |
| ? ^ | |
| + %lambda_module.67 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.23) | |
| ? ^ | |
| - %attention.269 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.67) | |
| ? ^ | |
| + %attention.267 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.67) | |
| ? ^ | |
| - %attention.271 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.269) | |
| + %linear_layers.179 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.267) | |
| + %_2.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.179) | |
| %model.331 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.329 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.331) | |
| - %_5.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.329) | |
| ? ^ | |
| + %_5.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.329) | |
| ? ^ | |
| - %lambda_module.65 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.23) | |
| ? ^ | |
| + %lambda_module.65 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.21) | |
| ? ^ | |
| - %attention.267 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.65) | |
| ? ^ | |
| + %attention.265 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.65) | |
| ? ^ | |
| - %linear_layers.179 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.267) | |
| ? ^ ^ | |
| + %linear_layers.177 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.265) | |
| ? ^ ^ | |
| - %_2.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.179) | |
| ? ^ ^ ^ | |
| + %_1.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.177) | |
| ? ^ ^ ^ | |
| %model.329 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.327 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.329) | |
| - %_5.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.327) | |
| ? - | |
| + %_5.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.327) | |
| ? + | |
| - %lambda_module.63 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.21) | |
| ? - | |
| + %lambda_module.63 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.19) | |
| ? + | |
| - %attention.265 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.63) | |
| ? ^ | |
| + %attention.263 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.63) | |
| ? ^ | |
| - %linear_layers.177 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.265) | |
| ? ^ ^ | |
| + %linear_layers.175 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.263) | |
| ? ^ ^ | |
| - %_1.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.177) | |
| ? ^ ^ ^ ^ | |
| + %_0.103 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.175) | |
| ? ^ ^ ^ ^ | |
| %model.327 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.325 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.327) | |
| - %_5.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.325) | |
| ? ^ | |
| + %_5.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.325) | |
| ? ^ | |
| + %input_sublayer.11 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_5.17) | |
| - %lambda_module.61 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_5.19) | |
| - %attention.263 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.61) | |
| - %linear_layers.175 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.263) | |
| - %_0.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.175) | |
| %model.325 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.323 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.325) | |
| - %_5.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.323) | |
| ? ^^^^ ^ | |
| + %_4 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.323) | |
| ? ^ ^ | |
| - %input_sublayer.11 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_5.17) | |
| + %dropout.61 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_4) | |
| %model.323 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.321 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.323) | |
| - %_4 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.321) | |
| + %_4.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.321) | |
| ? +++ | |
| - %dropout.61 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_4) | |
| + %feed_forward.39 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.39) | |
| + %w_2.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.39) | |
| %model.321 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.319 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.321) | |
| - %_4.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.319) | |
| ? ^ | |
| + %_4.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.319) | |
| ? ^ | |
| - %feed_forward.39 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.39) | |
| ? ^ ^ | |
| + %feed_forward.37 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.37) | |
| ? ^ ^ | |
| - %w_2.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.39) | |
| ? ^^^ ^^^^^ ^^^^^ ^^^ ^ | |
| + %dropout.59 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.37) | |
| ? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
| %model.319 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.317 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.319) | |
| - %_4.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.317) | |
| ? ^ | |
| + %_4.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.317) | |
| ? ^ | |
| - %feed_forward.37 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.37) | |
| ? ^ ^ | |
| + %feed_forward.35 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.35) | |
| ? ^ ^ | |
| - %dropout.59 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.37) | |
| + %activation.9 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.35) | |
| %model.317 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.315 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.317) | |
| - %_4.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.315) | |
| ? ^ | |
| + %_4.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.315) | |
| ? ^ | |
| - %feed_forward.35 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.35) | |
| ? ^ ^ | |
| + %feed_forward.33 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.33) | |
| ? ^ ^ | |
| - %activation.9 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.35) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
| + %w_1.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.33) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.315 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.313 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.315) | |
| - %_4.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.313) | |
| ? ^ | |
| + %_4.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.313) | |
| ? ^ | |
| + %output_sublayer.9 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_4.31) | |
| - %feed_forward.33 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_4.33) | |
| - %w_1.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.33) | |
| %model.313 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.311 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.313) | |
| - %_4.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.311) | |
| ? ^^ | |
| + %_4.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.311) | |
| ? ^^ | |
| - %output_sublayer.9 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_4.31) | |
| + %lambda_module.61 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.29) | |
| + %attention.261 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.61) | |
| + %output_linear.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.261) | |
| %model.311 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.309 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.311) | |
| - %_4.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.309) | |
| ? ^ | |
| + %_4.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.309) | |
| ? ^ | |
| - %lambda_module.59 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.29) | |
| ? ^ | |
| + %lambda_module.59 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.27) | |
| ? ^ | |
| - %attention.261 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.59) | |
| ? ^^ | |
| + %attention.259 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.59) | |
| ? ^^ | |
| - %output_linear.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.261) | |
| + %dropout.53 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.259) | |
| + %dropout.55 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.53) | |
| %model.309 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.307 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.309) | |
| - %_4.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.307) | |
| ? ^ | |
| + %_4.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.307) | |
| ? ^ | |
| - %lambda_module.57 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.27) | |
| ? ^ | |
| + %lambda_module.57 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.25) | |
| ? ^ | |
| - %attention.259 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.57) | |
| ? ^ | |
| + %attention.255 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.57) | |
| ? ^ | |
| + %attention.257 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.255) | |
| - %dropout.53 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.259) | |
| - %dropout.55 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.53) | |
| %model.307 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.305 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.307) | |
| - %_4.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.305) | |
| ? ^ | |
| + %_4.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.305) | |
| ? ^ | |
| - %lambda_module.55 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.25) | |
| ? ^ | |
| + %lambda_module.55 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.23) | |
| ? ^ | |
| - %attention.255 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.55) | |
| ? ^ | |
| + %attention.253 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.55) | |
| ? ^ | |
| - %attention.257 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.255) | |
| + %linear_layers.173 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.253) | |
| + %_2.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.173) | |
| %model.305 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.303 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.305) | |
| - %_4.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.303) | |
| ? ^ | |
| + %_4.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.303) | |
| ? ^ | |
| - %lambda_module.53 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.23) | |
| ? ^ | |
| + %lambda_module.53 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.21) | |
| ? ^ | |
| - %attention.253 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.53) | |
| ? ^ | |
| + %attention.251 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.53) | |
| ? ^ | |
| - %linear_layers.173 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.253) | |
| ? ^ ^ | |
| + %linear_layers.171 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.251) | |
| ? ^ ^ | |
| - %_2.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.173) | |
| ? ^ ^ ^ | |
| + %_1.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.171) | |
| ? ^ ^ ^ | |
| %model.303 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.301 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.303) | |
| - %_4.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.301) | |
| ? - | |
| + %_4.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.301) | |
| ? + | |
| - %lambda_module.51 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.21) | |
| ? - | |
| + %lambda_module.51 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.19) | |
| ? + | |
| - %attention.251 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.51) | |
| ? ^^ | |
| + %attention.249 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.51) | |
| ? ^^ | |
| - %linear_layers.171 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.251) | |
| ? ^^ ^^ | |
| + %linear_layers.169 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.249) | |
| ? ^^ ^^ | |
| - %_1.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.171) | |
| ? ^^^ ^ ^^ | |
| + %_0.101 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.169) | |
| ? ++ ^^ ^ ^^ | |
| %model.301 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.299 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.301) | |
| - %_4.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.299) | |
| ? ^ | |
| + %_4.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.299) | |
| ? ^ | |
| + %input_sublayer.9 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_4.17) | |
| - %lambda_module.49 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_4.19) | |
| - %attention.249 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.49) | |
| - %linear_layers.169 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.249) | |
| - %_0.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.169) | |
| %model.299 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.297 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.299) | |
| - %_4.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.297) | |
| ? ^^^^ ^ | |
| + %_3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.297) | |
| ? ^ ^ | |
| - %input_sublayer.9 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_4.17) | |
| + %dropout.49 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_3) | |
| %model.297 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.295 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.297) | |
| - %_3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.295) | |
| + %_3.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.295) | |
| ? +++ | |
| - %dropout.49 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_3) | |
| + %feed_forward.31 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.39) | |
| + %w_2.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.31) | |
| %model.295 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.293 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.295) | |
| - %_3.39 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.293) | |
| ? ^ | |
| + %_3.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.293) | |
| ? ^ | |
| - %feed_forward.31 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.39) | |
| ? ^^ ^ | |
| + %feed_forward.29 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.37) | |
| ? ^^ ^ | |
| - %w_2.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.31) | |
| ? ^^^ ^^^^^ ^^^^^ ^^^ ^^ | |
| + %dropout.47 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.29) | |
| ? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^^ | |
| %model.293 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.291 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.293) | |
| - %_3.37 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.291) | |
| ? ^ | |
| + %_3.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.291) | |
| ? ^ | |
| - %feed_forward.29 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.37) | |
| ? ^ ^ | |
| + %feed_forward.27 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.35) | |
| ? ^ ^ | |
| - %dropout.47 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.29) | |
| + %activation.7 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.27) | |
| %model.291 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.289 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.291) | |
| - %_3.35 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.289) | |
| ? ^ | |
| + %_3.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.289) | |
| ? ^ | |
| - %feed_forward.27 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.35) | |
| ? ^ ^ | |
| + %feed_forward.25 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.33) | |
| ? ^ ^ | |
| - %activation.7 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.27) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
| + %w_1.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.25) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.289 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.287 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.289) | |
| - %_3.33 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.287) | |
| ? ^ | |
| + %_3.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.287) | |
| ? ^ | |
| + %output_sublayer.7 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_3.31) | |
| - %feed_forward.25 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_3.33) | |
| - %w_1.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.25) | |
| %model.287 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.285 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.287) | |
| - %_3.31 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.285) | |
| ? ^^ | |
| + %_3.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.285) | |
| ? ^^ | |
| - %output_sublayer.7 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_3.31) | |
| + %lambda_module.49 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.29) | |
| + %attention.247 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.49) | |
| + %output_linear.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.247) | |
| %model.285 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.283 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.285) | |
| - %_3.29 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.283) | |
| ? ^ | |
| + %_3.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.283) | |
| ? ^ | |
| - %lambda_module.47 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.29) | |
| ? ^ | |
| + %lambda_module.47 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.27) | |
| ? ^ | |
| - %attention.247 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.47) | |
| ? ^ | |
| + %attention.245 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.47) | |
| ? ^ | |
| - %output_linear.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.247) | |
| + %dropout.41 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.245) | |
| + %dropout.43 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.41) | |
| %model.283 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.281 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.283) | |
| - %_3.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.281) | |
| ? ^ | |
| + %_3.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.281) | |
| ? ^ | |
| - %lambda_module.45 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.27) | |
| ? ^ | |
| + %lambda_module.45 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.25) | |
| ? ^ | |
| - %attention.245 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.45) | |
| ? ^ | |
| + %attention.241 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.45) | |
| ? ^ | |
| + %attention.243 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.241) | |
| - %dropout.41 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.245) | |
| - %dropout.43 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.41) | |
| %model.281 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.279 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.281) | |
| - %_3.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.279) | |
| ? ^ | |
| + %_3.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.279) | |
| ? ^ | |
| - %lambda_module.43 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.25) | |
| ? ^ | |
| + %lambda_module.43 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.23) | |
| ? ^ | |
| - %attention.241 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.43) | |
| ? ^^ | |
| + %attention.239 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.43) | |
| ? ^^ | |
| - %attention.243 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.241) | |
| + %linear_layers.167 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.239) | |
| + %_2.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.167) | |
| %model.279 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.277 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.279) | |
| - %_3.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.277) | |
| ? ^ | |
| + %_3.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.277) | |
| ? ^ | |
| - %lambda_module.41 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.23) | |
| ? ^ | |
| + %lambda_module.41 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.21) | |
| ? ^ | |
| - %attention.239 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.41) | |
| ? ^ | |
| + %attention.237 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.41) | |
| ? ^ | |
| - %linear_layers.167 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.239) | |
| ? ^ ^ | |
| + %linear_layers.165 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.237) | |
| ? ^ ^ | |
| - %_2.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.167) | |
| ? ^ ^ ^ | |
| + %_1.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.165) | |
| ? ^ ^ ^ | |
| %model.277 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.275 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.277) | |
| - %_3.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.275) | |
| ? - | |
| + %_3.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.275) | |
| ? + | |
| - %lambda_module.39 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.21) | |
| ? - | |
| + %lambda_module.39 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.19) | |
| ? + | |
| - %attention.237 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.39) | |
| ? ^ | |
| + %attention.235 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.39) | |
| ? ^ | |
| - %linear_layers.165 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.237) | |
| ? ^ ^ | |
| + %linear_layers.163 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.235) | |
| ? ^ ^ | |
| - %_1.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.165) | |
| ? ^ ^ ^ ^ | |
| + %_0.99 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.163) | |
| ? ^ ^ ^ ^ | |
| %model.275 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.273 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.275) | |
| - %_3.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.273) | |
| ? ^ | |
| + %_3.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.273) | |
| ? ^ | |
| + %input_sublayer.7 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_3.17) | |
| - %lambda_module.37 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_3.19) | |
| - %attention.235 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.37) | |
| - %linear_layers.163 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.235) | |
| - %_0.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.163) | |
| %model.273 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.271 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.273) | |
| - %_3.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.271) | |
| ? ^ ^^ ^ | |
| + %_2.95 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.271) | |
| ? ^ ^^ ^ | |
| - %input_sublayer.7 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_3.17) | |
| + %dropout.37 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_2.95) | |
| %model.271 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.269 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.271) | |
| - %_2.95 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.269) | |
| ? ^ | |
| + %_2.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.269) | |
| ? ^ | |
| - %dropout.37 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_2.95) | |
| + %feed_forward.23 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.93) | |
| + %w_2.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.23) | |
| %model.269 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.267 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.269) | |
| - %_2.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.267) | |
| ? ^ | |
| + %_2.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.267) | |
| ? ^ | |
| - %feed_forward.23 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.93) | |
| ? ^ ^ | |
| + %feed_forward.21 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.91) | |
| ? ^ ^ | |
| - %w_2.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.23) | |
| ? ^^^ ^^^^^ ^^^^^ ^^^ ^ | |
| + %dropout.35 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.21) | |
| ? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
| %model.267 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.265 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.267) | |
| - %_2.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.265) | |
| ? - | |
| + %_2.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.265) | |
| ? + | |
| - %feed_forward.21 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.91) | |
| ? - - | |
| + %feed_forward.19 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.89) | |
| ? + + | |
| - %dropout.35 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.21) | |
| + %activation.5 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.19) | |
| %model.265 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.263 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.265) | |
| - %_2.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.263) | |
| ? ^ | |
| + %_2.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.263) | |
| ? ^ | |
| - %feed_forward.19 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.89) | |
| ? ^ ^ | |
| + %feed_forward.17 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.87) | |
| ? ^ ^ | |
| - %activation.5 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.19) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
| + %w_1.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.17) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.263 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.261 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.263) | |
| - %_2.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.261) | |
| ? ^ | |
| + %_2.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.261) | |
| ? ^ | |
| + %output_sublayer.5 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_2.85) | |
| - %feed_forward.17 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_2.87) | |
| - %w_1.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.17) | |
| %model.261 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.259 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.261) | |
| - %_2.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.259) | |
| ? ^ | |
| + %_2.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.259) | |
| ? ^ | |
| - %output_sublayer.5 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_2.85) | |
| + %lambda_module.37 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.83) | |
| + %attention.233 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.37) | |
| + %output_linear.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.233) | |
| %model.259 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.257 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.259) | |
| - %_2.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.257) | |
| ? ^ | |
| + %_2.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.257) | |
| ? ^ | |
| - %lambda_module.35 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.83) | |
| ? ^ | |
| + %lambda_module.35 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.81) | |
| ? ^ | |
| - %attention.233 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.35) | |
| ? ^ | |
| + %attention.231 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.35) | |
| ? ^ | |
| - %output_linear.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.233) | |
| + %dropout.29 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.231) | |
| + %dropout.31 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.29) | |
| %model.257 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.255 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.257) | |
| - %_2.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.255) | |
| ? ^^ | |
| + %_2.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.255) | |
| ? ^^ | |
| - %lambda_module.33 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.81) | |
| ? ^^ | |
| + %lambda_module.33 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.79) | |
| ? ^^ | |
| - %attention.231 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.33) | |
| ? ^^ | |
| + %attention.227 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.33) | |
| ? ^^ | |
| + %attention.229 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.227) | |
| - %dropout.29 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.231) | |
| - %dropout.31 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.29) | |
| %model.255 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.253 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.255) | |
| - %_2.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.253) | |
| ? ^ | |
| + %_2.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.253) | |
| ? ^ | |
| - %lambda_module.31 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.79) | |
| ? ^ | |
| + %lambda_module.31 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.75) | |
| ? ^ | |
| - %attention.227 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.31) | |
| ? ^ | |
| + %attention.225 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.31) | |
| ? ^ | |
| - %attention.229 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.227) | |
| + %linear_layers.161 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.225) | |
| + %_2.77 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.161) | |
| %model.253 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.251 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.253) | |
| - %_2.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.251) | |
| ? ^ | |
| + %_2.73 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.251) | |
| ? ^ | |
| - %lambda_module.29 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.75) | |
| ? ^ | |
| + %lambda_module.29 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.73) | |
| ? ^ | |
| - %attention.225 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.29) | |
| ? ^ | |
| + %attention.223 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.29) | |
| ? ^ | |
| - %linear_layers.161 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.225) | |
| ? ^^ ^ | |
| + %linear_layers.159 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.223) | |
| ? ^^ ^ | |
| - %_2.77 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.161) | |
| ? ^ ^^ ^ ^^ | |
| + %_1.95 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.159) | |
| ? ^ ^^ ^ ^^ | |
| %model.251 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.249 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.251) | |
| - %_2.73 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.249) | |
| ? ^ | |
| + %_2.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.249) | |
| ? ^ | |
| - %lambda_module.27 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.73) | |
| ? ^ | |
| + %lambda_module.27 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.71) | |
| ? ^ | |
| - %attention.223 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.27) | |
| ? ^ | |
| + %attention.221 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.27) | |
| ? ^ | |
| - %linear_layers.159 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.223) | |
| ? ^ ^ | |
| + %linear_layers.157 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.221) | |
| ? ^ ^ | |
| - %_1.95 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.159) | |
| ? ^ ^ ^ ^ | |
| + %_0.97 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.157) | |
| ? ^ ^ ^ ^ | |
| %model.249 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.247 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.249) | |
| - %_2.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.247) | |
| ? ^^ | |
| + %_2.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.247) | |
| ? ^^ | |
| + %input_sublayer.5 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_2.69) | |
| - %lambda_module.25 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_2.71) | |
| - %attention.221 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.25) | |
| - %linear_layers.157 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.221) | |
| - %_0.95 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.157) | |
| %model.247 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.245 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.247) | |
| - %_2.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.245) | |
| ? ^ - ^ | |
| + %_1.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.245) | |
| ? ^ + ^ | |
| - %input_sublayer.5 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_2.69) | |
| + %dropout.25 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_1.93) | |
| %model.245 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.243 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.245) | |
| - %_1.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.243) | |
| ? ^ | |
| + %_1.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.243) | |
| ? ^ | |
| - %dropout.25 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_1.93) | |
| + %feed_forward.15 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.91) | |
| + %w_2.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.15) | |
| %model.243 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.241 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.243) | |
| - %_1.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.241) | |
| ? - | |
| + %_1.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.241) | |
| ? + | |
| - %feed_forward.15 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.91) | |
| ? ^ - | |
| + %feed_forward.13 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.89) | |
| ? ^ + | |
| - %w_2.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.15) | |
| ? ^^ - ^^^^^ ^^^^^ ^^^ ^ | |
| + %dropout.23 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.13) | |
| ? ^^^^^^^^ ^ +++++ ^ +++++ ^^^^^^^ ^ | |
| %model.241 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.239 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.241) | |
| - %_1.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.239) | |
| ? ^ | |
| + %_1.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.239) | |
| ? ^ | |
| - %feed_forward.13 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.89) | |
| ? ^ ^ | |
| + %feed_forward.11 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.87) | |
| ? ^ ^ | |
| - %dropout.23 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.13) | |
| + %activation.3 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.11) | |
| %model.239 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.237 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.239) | |
| - %_1.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.237) | |
| ? ^ | |
| + %_1.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.237) | |
| ? ^ | |
| - %feed_forward.11 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.87) | |
| ? ^^ ^ | |
| + %feed_forward.9 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.85) | |
| ? ^ ^ | |
| - %activation.3 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.11) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^^ | |
| + %w_1.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.9) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.237 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.235 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.237) | |
| - %_1.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.235) | |
| ? ^ | |
| + %_1.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.235) | |
| ? ^ | |
| + %output_sublayer.3 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_1.83) | |
| - %feed_forward.9 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_1.85) | |
| - %w_1.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.9) | |
| %model.235 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.233 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.235) | |
| - %_1.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.233) | |
| ? ^ | |
| + %_1.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.233) | |
| ? ^ | |
| - %output_sublayer.3 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_1.83) | |
| + %lambda_module.25 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.81) | |
| + %attention.219 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.25) | |
| + %output_linear.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.219) | |
| %model.233 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.231 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.233) | |
| - %_1.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.231) | |
| ? ^^ | |
| + %_1.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.231) | |
| ? ^^ | |
| - %lambda_module.23 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.81) | |
| ? ^^ | |
| + %lambda_module.23 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.79) | |
| ? ^^ | |
| - %attention.219 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.23) | |
| ? ^ | |
| + %attention.217 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.23) | |
| ? ^ | |
| - %output_linear.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.219) | |
| + %dropout.17 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.217) | |
| + %dropout.19 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.17) | |
| %model.231 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.229 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.231) | |
| - %_1.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.229) | |
| ? ^ | |
| + %_1.77 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.229) | |
| ? ^ | |
| - %lambda_module.21 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.79) | |
| ? ^ | |
| + %lambda_module.21 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.77) | |
| ? ^ | |
| - %attention.217 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.21) | |
| ? ^ | |
| + %attention.213 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.21) | |
| ? ^ | |
| + %attention.215 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.213) | |
| - %dropout.17 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.217) | |
| - %dropout.19 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.17) | |
| %model.229 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.227 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.229) | |
| - %_1.77 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.227) | |
| ? ^ | |
| + %_1.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.227) | |
| ? ^ | |
| - %lambda_module.19 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.77) | |
| ? ^ | |
| + %lambda_module.19 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.75) | |
| ? ^ | |
| - %attention.213 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.19) | |
| ? ^ | |
| + %attention.211 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.19) | |
| ? ^ | |
| - %attention.215 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.213) | |
| + %linear_layers.155 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.211) | |
| + %_2.67 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.155) | |
| %model.227 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.225 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.227) | |
| - %_1.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.225) | |
| ? ^ | |
| + %_1.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.225) | |
| ? ^ | |
| - %lambda_module.17 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.75) | |
| ? ^ | |
| + %lambda_module.17 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.71) | |
| ? ^ | |
| - %attention.211 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.17) | |
| ? ^^ | |
| + %attention.209 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.17) | |
| ? ^^ | |
| - %linear_layers.155 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.211) | |
| ? ^ ^^ | |
| + %linear_layers.153 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.209) | |
| ? ^ ^^ | |
| - %_2.67 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.155) | |
| ? ^ - ^ ^ | |
| + %_1.73 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.153) | |
| ? ^ + ^ ^ | |
| %model.225 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.223 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.225) | |
| - %_1.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.223) | |
| ? ^^ | |
| + %_1.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.223) | |
| ? ^^ | |
| - %lambda_module.15 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.71) | |
| ? ^^ | |
| + %lambda_module.15 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.69) | |
| ? ^^ | |
| - %attention.209 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.15) | |
| ? ^ | |
| + %attention.207 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.15) | |
| ? ^ | |
| - %linear_layers.153 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.209) | |
| ? ^ ^ | |
| + %linear_layers.151 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.207) | |
| ? ^ ^ | |
| - %_1.73 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.153) | |
| ? ^ ^^ ^ ^ | |
| + %_0.95 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.151) | |
| ? ^ ^^ ^ ^ | |
| %model.223 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.221 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.223) | |
| - %_1.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.221) | |
| ? ^ | |
| + %_1.67 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.221) | |
| ? ^ | |
| + %input_sublayer.3 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_1.67) | |
| - %lambda_module.13 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_1.69) | |
| - %attention.207 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.13) | |
| - %linear_layers.151 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.207) | |
| - %_0.93 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.151) | |
| %model.221 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.219 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.221) | |
| - %_1.67 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.219) | |
| ? ^ ^^ ^ | |
| + %_0.93 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.219) | |
| ? ^ ^^ ^ | |
| - %input_sublayer.3 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_1.67) | |
| + %dropout.13 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_0.93) | |
| %model.219 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.217 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.219) | |
| %_0.91 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.217) | |
| - %dropout.13 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%_0.91) | |
| + %feed_forward.7 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.91) | |
| + %w_2.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.7) | |
| %model.217 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.215 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.217) | |
| %_0.89 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.215) | |
| - %feed_forward.7 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.89) | |
| ? ^ | |
| + %feed_forward.5 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.89) | |
| ? ^ | |
| - %w_2.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_2"](%feed_forward.7) | |
| ? ^^^ ^^^^^ ^^^^^ ^^^ ^ | |
| + %dropout.11 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.5) | |
| ? ^^^^^^^ + ^ +++++ ^ +++++ ^^^^^^^ ^ | |
| %model.215 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.213 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.215) | |
| %_0.87 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.213) | |
| - %feed_forward.5 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.87) | |
| ? ^ | |
| + %feed_forward.3 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.87) | |
| ? ^ | |
| - %dropout.11 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%feed_forward.5) | |
| + %activation.1 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.3) | |
| %model.213 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.211 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.213) | |
| %_0.85 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.211) | |
| - %feed_forward.3 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.85) | |
| ? ^ | |
| + %feed_forward.1 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.85) | |
| ? ^ | |
| - %activation.1 : __torch__.torch.nn.modules.activation.GELU = prim::GetAttr[name="activation"](%feed_forward.3) | |
| ? ^^^^^^^^^^ ^^ ^ ^^^^^^^^^ ^^^^^^^^^^ ^ | |
| + %w_1.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.1) | |
| ? ^^^ ++++ ^^^ ^^ ^ ^^^ ^ | |
| %model.211 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.209 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.211) | |
| %_0.83 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.209) | |
| + %output_sublayer.1 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_0.83) | |
| - %feed_forward.1 : __torch__.bert_pytorch.model.utils.feed_forward.PositionwiseFeedForward = prim::GetAttr[name="feed_forward"](%_0.83) | |
| - %w_1.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="w_1"](%feed_forward.1) | |
| %model.209 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.207 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.209) | |
| %_0.81 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.207) | |
| - %output_sublayer.1 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="output_sublayer"](%_0.81) | |
| + %lambda_module.13 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.81) | |
| + %attention.205 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.13) | |
| + %output_linear.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.205) | |
| %model.207 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.205 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.207) | |
| %_0.79 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.205) | |
| %lambda_module.11 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.79) | |
| - %attention.205 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.11) | |
| ? ^ | |
| + %attention.203 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.11) | |
| ? ^ | |
| - %output_linear.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.205) | |
| + %dropout.5 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.203) | |
| + %dropout.7 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.5) | |
| %model.205 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.203 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.205) | |
| %_0.77 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.203) | |
| %lambda_module.9 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.77) | |
| - %attention.203 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.9) | |
| ? ^^^ | |
| + %attention.199 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.9) | |
| ? ^^^ | |
| + %attention.201 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.199) | |
| - %dropout.5 : __torch__.bert_pytorch.model.attention.multi_head.DropoutWrapper = prim::GetAttr[name="dropout"](%attention.203) | |
| - %dropout.7 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%dropout.5) | |
| %model.203 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.201 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.203) | |
| %_0.75 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.201) | |
| %lambda_module.7 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.75) | |
| - %attention.199 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.7) | |
| ? ^ | |
| + %attention.197 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.7) | |
| ? ^ | |
| - %attention.201 : __torch__.bert_pytorch.model.attention.single.Attention = prim::GetAttr[name="attention"](%attention.199) | |
| + %linear_layers.149 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.197) | |
| + %_2.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.149) | |
| %model.201 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.199 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.201) | |
| %_0.73 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.199) | |
| %lambda_module.5 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.73) | |
| - %attention.197 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.5) | |
| ? ^ | |
| + %attention.195 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.5) | |
| ? ^ | |
| - %linear_layers.149 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.197) | |
| ? ^ ^ | |
| + %linear_layers.147 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.195) | |
| ? ^ ^ | |
| - %_2.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.149) | |
| ? ^ ^ ^ | |
| + %_1.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.147) | |
| ? ^ ^ ^ | |
| %model.199 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.197 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.199) | |
| - %_0.71 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.197) | |
| ? ^^ | |
| + %_0.69 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.197) | |
| ? ^^ | |
| - %lambda_module.3 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.71) | |
| ? ^^ | |
| + %lambda_module.3 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.69) | |
| ? ^^ | |
| - %attention.195 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.3) | |
| ? ^ | |
| + %attention.193 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.3) | |
| ? ^ | |
| - %linear_layers.147 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.195) | |
| ? ^ ^ | |
| + %linear_layers.145 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.193) | |
| ? ^ ^ | |
| - %_1.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.147) | |
| ? --- ^ ^ | |
| + %_0.71 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.145) | |
| ? +++ ^ ^ | |
| %model.197 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.195 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.197) | |
| %_0.67 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.195) | |
| + %input_sublayer.1 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_0.67) | |
| - %lambda_module.1 : __torch__.bert_pytorch.model.transformer.LambdaModule = prim::GetAttr[name="lambda_module"](%_0.67) | |
| - %attention.193 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%lambda_module.1) | |
| - %linear_layers.145 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.193) | |
| - %_0.69 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.145) | |
| %model.195 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %embedding : __torch__.bert_pytorch.model.embedding.bert.BERTEmbedding = prim::GetAttr[name="embedding"](%model.195) | |
| - %transformer_blocks.193 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.195) | |
| - %_0.65 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.193) | |
| - %input_sublayer.1 : __torch__.bert_pytorch.model.utils.sublayer.SublayerConnection = prim::GetAttr[name="input_sublayer"](%_0.65) | |
| %model.193 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %embedding : __torch__.bert_pytorch.model.embedding.bert.BERTEmbedding = prim::GetAttr[name="embedding"](%model.193) | |
| - %model.191 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.191 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.191) | |
| ? ^ ^ | |
| + %transformer_blocks.193 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.193) | |
| ? ^ ^ | |
| - %_11.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.191) | |
| ? ^ | |
| + %_11.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.193) | |
| ? ^ | |
| %attention.191 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.15) | |
| %output_linear.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.191) | |
| %bias.95 : Tensor = prim::GetAttr[name="bias"](%output_linear.47) | |
| - %model.189 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? - | |
| + %model.191 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? + | |
| - %transformer_blocks.189 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.189) | |
| ? - - | |
| + %transformer_blocks.191 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.191) | |
| ? + + | |
| - %_11.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.189) | |
| ? - | |
| + %_11.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.191) | |
| ? + | |
| %attention.189 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.13) | |
| %output_linear.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.189) | |
| %weight.99 : Tensor = prim::GetAttr[name="weight"](%output_linear.45) | |
| - %model.187 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.189 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.187 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.187) | |
| ? ^ ^ | |
| + %transformer_blocks.189 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.189) | |
| ? ^ ^ | |
| - %_11.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.187) | |
| ? ^ | |
| + %_11.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.189) | |
| ? ^ | |
| %attention.187 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.11) | |
| %linear_layers.143 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.187) | |
| %_2.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.143) | |
| %bias.93 : Tensor = prim::GetAttr[name="bias"](%_2.63) | |
| - %model.185 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.187 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.185 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.185) | |
| ? ^ ^ | |
| + %transformer_blocks.187 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.187) | |
| ? ^ ^ | |
| - %_11.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.185) | |
| ? ^ | |
| + %_11.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.187) | |
| ? ^ | |
| %attention.185 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.9) | |
| %linear_layers.141 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.185) | |
| %_2.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.141) | |
| %weight.97 : Tensor = prim::GetAttr[name="weight"](%_2.61) | |
| - %model.183 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.185 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.183 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.183) | |
| ? ^ ^ | |
| + %transformer_blocks.185 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.185) | |
| ? ^ ^ | |
| - %_11.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.183) | |
| ? ^ | |
| + %_11.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.185) | |
| ? ^ | |
| %attention.183 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.7) | |
| %linear_layers.139 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.183) | |
| %_1.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.139) | |
| %bias.91 : Tensor = prim::GetAttr[name="bias"](%_1.63) | |
| - %model.181 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.183 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.181 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.181) | |
| ? ^ ^ | |
| + %transformer_blocks.183 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.183) | |
| ? ^ ^ | |
| - %_11.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.181) | |
| ? ^ | |
| + %_11.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.183) | |
| ? ^ | |
| %attention.181 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.5) | |
| %linear_layers.137 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.181) | |
| %_1.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.137) | |
| %weight.95 : Tensor = prim::GetAttr[name="weight"](%_1.61) | |
| + %model.181 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.181 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.181) | |
| + %_11.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.181) | |
| + %attention.179 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.3) | |
| + %linear_layers.135 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.179) | |
| + %_0.65 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.135) | |
| + %bias.89 : Tensor = prim::GetAttr[name="bias"](%_0.65) | |
| %model.179 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.179 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.179) | |
| - %_11.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.179) | |
| ? ^ | |
| + %_11.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.179) | |
| ? ^ | |
| - %attention.179 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.3) | |
| ? ^ ^ | |
| + %attention.177 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.1) | |
| ? ^ ^ | |
| - %linear_layers.135 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.179) | |
| ? ^ ^ | |
| + %linear_layers.133 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.177) | |
| ? ^ ^ | |
| - %_0.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.135) | |
| ? ^ | |
| + %_0.63 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.133) | |
| ? ^ | |
| - %bias.89 : Tensor = prim::GetAttr[name="bias"](%_0.63) | |
| ? ^ ^^ - ^ ^^ | |
| + %weight.93 : Tensor = prim::GetAttr[name="weight"](%_0.63) | |
| ? ^^ ^^^ + ^^ ^^^ | |
| %model.177 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.177 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.177) | |
| - %_11.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="11"](%transformer_blocks.177) | |
| - %attention.177 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_11.1) | |
| - %linear_layers.133 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.177) | |
| - %_0.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.133) | |
| - %weight.93 : Tensor = prim::GetAttr[name="weight"](%_0.61) | |
| - %model.175 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.175 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.175) | |
| - %_10.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.175) | |
| ? ^ | |
| + %_10.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.177) | |
| ? ^ | |
| %attention.175 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.15) | |
| %output_linear.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.175) | |
| %bias.87 : Tensor = prim::GetAttr[name="bias"](%output_linear.43) | |
| - %model.173 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.175 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.173 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.173) | |
| ? ^ ^ | |
| + %transformer_blocks.175 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.175) | |
| ? ^ ^ | |
| - %_10.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.173) | |
| ? ^ | |
| + %_10.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.175) | |
| ? ^ | |
| %attention.173 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.13) | |
| %output_linear.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.173) | |
| %weight.91 : Tensor = prim::GetAttr[name="weight"](%output_linear.41) | |
| - %model.171 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.173 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.171 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.171) | |
| ? ^ ^ | |
| + %transformer_blocks.173 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.173) | |
| ? ^ ^ | |
| - %_10.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.171) | |
| ? ^ | |
| + %_10.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.173) | |
| ? ^ | |
| %attention.171 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.11) | |
| %linear_layers.131 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.171) | |
| %_2.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.131) | |
| %bias.85 : Tensor = prim::GetAttr[name="bias"](%_2.59) | |
| - %model.169 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| + %model.171 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| - %transformer_blocks.169 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.169) | |
| ? ^^ ^^ | |
| + %transformer_blocks.171 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.171) | |
| ? ^^ ^^ | |
| - %_10.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.169) | |
| ? ^^ | |
| + %_10.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.171) | |
| ? ^^ | |
| %attention.169 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.9) | |
| %linear_layers.129 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.169) | |
| %_2.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.129) | |
| %weight.89 : Tensor = prim::GetAttr[name="weight"](%_2.57) | |
| - %model.167 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.169 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.167 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.167) | |
| ? ^ ^ | |
| + %transformer_blocks.169 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.169) | |
| ? ^ ^ | |
| - %_10.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.167) | |
| ? ^ | |
| + %_10.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.169) | |
| ? ^ | |
| %attention.167 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.7) | |
| %linear_layers.127 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.167) | |
| %_1.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.127) | |
| %bias.83 : Tensor = prim::GetAttr[name="bias"](%_1.59) | |
| - %model.165 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.167 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.165 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.165) | |
| ? ^ ^ | |
| + %transformer_blocks.167 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.167) | |
| ? ^ ^ | |
| - %_10.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.165) | |
| ? ^ | |
| + %_10.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.167) | |
| ? ^ | |
| %attention.165 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.5) | |
| %linear_layers.125 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.165) | |
| %_1.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.125) | |
| %weight.87 : Tensor = prim::GetAttr[name="weight"](%_1.57) | |
| + %model.165 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.165 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.165) | |
| + %_10.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.165) | |
| + %attention.163 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.3) | |
| + %linear_layers.123 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.163) | |
| + %_0.61 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.123) | |
| + %bias.81 : Tensor = prim::GetAttr[name="bias"](%_0.61) | |
| %model.163 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.163 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.163) | |
| - %_10.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.163) | |
| ? ^ | |
| + %_10.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.163) | |
| ? ^ | |
| - %attention.163 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.3) | |
| ? ^ ^ | |
| + %attention.161 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.1) | |
| ? ^ ^ | |
| - %linear_layers.123 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.163) | |
| ? ^ ^ | |
| + %linear_layers.121 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.161) | |
| ? ^ ^ | |
| - %_0.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.123) | |
| ? ^ | |
| + %_0.59 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.121) | |
| ? ^ | |
| - %bias.81 : Tensor = prim::GetAttr[name="bias"](%_0.59) | |
| ? ^ ^^ ^ ^ ^^ | |
| + %weight.85 : Tensor = prim::GetAttr[name="weight"](%_0.59) | |
| ? ^^ ^^^ ^ ^^ ^^^ | |
| %model.161 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.161 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.161) | |
| - %_10.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="10"](%transformer_blocks.161) | |
| - %attention.161 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_10.1) | |
| - %linear_layers.121 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.161) | |
| - %_0.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.121) | |
| - %weight.85 : Tensor = prim::GetAttr[name="weight"](%_0.57) | |
| - %model.159 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.159 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.159) | |
| - %_9.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.159) | |
| ? ^^ | |
| + %_9.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.161) | |
| ? ^^ | |
| %attention.159 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.15) | |
| %output_linear.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.159) | |
| %bias.79 : Tensor = prim::GetAttr[name="bias"](%output_linear.39) | |
| - %model.157 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.159 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.157 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.157) | |
| ? ^ ^ | |
| + %transformer_blocks.159 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.159) | |
| ? ^ ^ | |
| - %_9.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.157) | |
| ? ^ | |
| + %_9.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.159) | |
| ? ^ | |
| %attention.157 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.13) | |
| %output_linear.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.157) | |
| %weight.83 : Tensor = prim::GetAttr[name="weight"](%output_linear.37) | |
| - %model.155 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.157 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.155 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.155) | |
| ? ^ ^ | |
| + %transformer_blocks.157 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.157) | |
| ? ^ ^ | |
| - %_9.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.155) | |
| ? ^ | |
| + %_9.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.157) | |
| ? ^ | |
| %attention.155 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.11) | |
| %linear_layers.119 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.155) | |
| %_2.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.119) | |
| %bias.77 : Tensor = prim::GetAttr[name="bias"](%_2.55) | |
| - %model.153 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.155 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.153 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.153) | |
| ? ^ ^ | |
| + %transformer_blocks.155 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.155) | |
| ? ^ ^ | |
| - %_9.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.153) | |
| ? ^ | |
| + %_9.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.155) | |
| ? ^ | |
| %attention.153 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.9) | |
| %linear_layers.117 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.153) | |
| %_2.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.117) | |
| %weight.81 : Tensor = prim::GetAttr[name="weight"](%_2.53) | |
| - %model.151 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.153 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.151 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.151) | |
| ? ^ ^ | |
| + %transformer_blocks.153 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.153) | |
| ? ^ ^ | |
| - %_9.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.151) | |
| ? ^ | |
| + %_9.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.153) | |
| ? ^ | |
| %attention.151 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.7) | |
| %linear_layers.115 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.151) | |
| %_1.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.115) | |
| %bias.75 : Tensor = prim::GetAttr[name="bias"](%_1.55) | |
| - %model.149 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| + %model.151 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| - %transformer_blocks.149 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.149) | |
| ? ^^ ^^ | |
| + %transformer_blocks.151 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.151) | |
| ? ^^ ^^ | |
| - %_9.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.149) | |
| ? ^^ | |
| + %_9.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.151) | |
| ? ^^ | |
| %attention.149 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.5) | |
| %linear_layers.113 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.149) | |
| %_1.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.113) | |
| %weight.79 : Tensor = prim::GetAttr[name="weight"](%_1.53) | |
| + %model.149 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.149 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.149) | |
| + %_9.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.149) | |
| + %attention.147 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.3) | |
| + %linear_layers.111 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.147) | |
| + %_0.57 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.111) | |
| + %bias.73 : Tensor = prim::GetAttr[name="bias"](%_0.57) | |
| %model.147 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.147 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.147) | |
| - %_9.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.147) | |
| ? ^ | |
| + %_9.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.147) | |
| ? ^ | |
| - %attention.147 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.3) | |
| ? ^ ^ | |
| + %attention.145 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.1) | |
| ? ^ ^ | |
| - %linear_layers.111 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.147) | |
| ? ^^ ^ | |
| + %linear_layers.109 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.145) | |
| ? ^^ ^ | |
| - %_0.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.111) | |
| ? ^^ | |
| + %_0.55 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.109) | |
| ? ^^ | |
| - %bias.73 : Tensor = prim::GetAttr[name="bias"](%_0.55) | |
| ? ^ ^^ ^ ^ ^^ | |
| + %weight.77 : Tensor = prim::GetAttr[name="weight"](%_0.55) | |
| ? ^^ ^^^ ^ ^^ ^^^ | |
| %model.145 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.145 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.145) | |
| - %_9.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="9"](%transformer_blocks.145) | |
| - %attention.145 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_9.1) | |
| - %linear_layers.109 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.145) | |
| - %_0.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.109) | |
| - %weight.77 : Tensor = prim::GetAttr[name="weight"](%_0.53) | |
| - %model.143 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.143 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.143) | |
| - %_8.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.143) | |
| ? ^ | |
| + %_8.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.145) | |
| ? ^ | |
| %attention.143 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.15) | |
| %output_linear.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.143) | |
| %bias.71 : Tensor = prim::GetAttr[name="bias"](%output_linear.35) | |
| - %model.141 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.143 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.141 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.141) | |
| ? ^ ^ | |
| + %transformer_blocks.143 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.143) | |
| ? ^ ^ | |
| - %_8.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.141) | |
| ? ^ | |
| + %_8.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.143) | |
| ? ^ | |
| %attention.141 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.13) | |
| %output_linear.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.141) | |
| %weight.75 : Tensor = prim::GetAttr[name="weight"](%output_linear.33) | |
| - %model.139 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| + %model.141 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| - %transformer_blocks.139 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.139) | |
| ? ^^ ^^ | |
| + %transformer_blocks.141 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.141) | |
| ? ^^ ^^ | |
| - %_8.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.139) | |
| ? ^^ | |
| + %_8.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.141) | |
| ? ^^ | |
| %attention.139 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.11) | |
| %linear_layers.107 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.139) | |
| %_2.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.107) | |
| %bias.69 : Tensor = prim::GetAttr[name="bias"](%_2.51) | |
| - %model.137 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.139 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.137 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.137) | |
| ? ^ ^ | |
| + %transformer_blocks.139 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.139) | |
| ? ^ ^ | |
| - %_8.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.137) | |
| ? ^ | |
| + %_8.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.139) | |
| ? ^ | |
| %attention.137 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.9) | |
| %linear_layers.105 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.137) | |
| %_2.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.105) | |
| %weight.73 : Tensor = prim::GetAttr[name="weight"](%_2.49) | |
| - %model.135 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.137 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.135 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.135) | |
| ? ^ ^ | |
| + %transformer_blocks.137 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.137) | |
| ? ^ ^ | |
| - %_8.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.135) | |
| ? ^ | |
| + %_8.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.137) | |
| ? ^ | |
| %attention.135 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.7) | |
| %linear_layers.103 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.135) | |
| %_1.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.103) | |
| %bias.67 : Tensor = prim::GetAttr[name="bias"](%_1.51) | |
| - %model.133 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.135 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.133 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.133) | |
| ? ^ ^ | |
| + %transformer_blocks.135 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.135) | |
| ? ^ ^ | |
| - %_8.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.133) | |
| ? ^ | |
| + %_8.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.135) | |
| ? ^ | |
| %attention.133 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.5) | |
| %linear_layers.101 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.133) | |
| %_1.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.101) | |
| %weight.71 : Tensor = prim::GetAttr[name="weight"](%_1.49) | |
| + %model.133 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.133 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.133) | |
| + %_8.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.133) | |
| + %attention.131 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.3) | |
| + %linear_layers.99 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.131) | |
| + %_0.53 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.99) | |
| + %bias.65 : Tensor = prim::GetAttr[name="bias"](%_0.53) | |
| %model.131 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.131 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.131) | |
| - %_8.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.131) | |
| ? ^ | |
| + %_8.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.131) | |
| ? ^ | |
| - %attention.131 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.3) | |
| ? ^^ ^ | |
| + %attention.129 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.1) | |
| ? ^^ ^ | |
| - %linear_layers.99 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.131) | |
| ? ^ ^^ | |
| + %linear_layers.97 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.129) | |
| ? ^ ^^ | |
| - %_0.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.99) | |
| ? ^ | |
| + %_0.51 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.97) | |
| ? ^ | |
| - %bias.65 : Tensor = prim::GetAttr[name="bias"](%_0.51) | |
| ? ^ ^^ ^ ^ ^^ | |
| + %weight.69 : Tensor = prim::GetAttr[name="weight"](%_0.51) | |
| ? ^^ ^^^ ^ ^^ ^^^ | |
| %model.129 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.129 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.129) | |
| - %_8.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="8"](%transformer_blocks.129) | |
| - %attention.129 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_8.1) | |
| - %linear_layers.97 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.129) | |
| - %_0.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.97) | |
| - %weight.69 : Tensor = prim::GetAttr[name="weight"](%_0.49) | |
| - %model.127 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.127 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.127) | |
| - %_7.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.127) | |
| ? ^ | |
| + %_7.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.129) | |
| ? ^ | |
| %attention.127 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.15) | |
| %output_linear.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.127) | |
| %bias.63 : Tensor = prim::GetAttr[name="bias"](%output_linear.31) | |
| - %model.125 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.127 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.125 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.125) | |
| ? ^ ^ | |
| + %transformer_blocks.127 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.127) | |
| ? ^ ^ | |
| - %_7.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.125) | |
| ? ^ | |
| + %_7.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.127) | |
| ? ^ | |
| %attention.125 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.13) | |
| %output_linear.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.125) | |
| %weight.67 : Tensor = prim::GetAttr[name="weight"](%output_linear.29) | |
| - %model.123 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.125 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.123 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.123) | |
| ? ^ ^ | |
| + %transformer_blocks.125 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.125) | |
| ? ^ ^ | |
| - %_7.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.123) | |
| ? ^ | |
| + %_7.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.125) | |
| ? ^ | |
| %attention.123 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.11) | |
| %linear_layers.95 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.123) | |
| %_2.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.95) | |
| %bias.61 : Tensor = prim::GetAttr[name="bias"](%_2.47) | |
| - %model.121 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.123 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.121 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.121) | |
| ? ^ ^ | |
| + %transformer_blocks.123 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.123) | |
| ? ^ ^ | |
| - %_7.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.121) | |
| ? ^ | |
| + %_7.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.123) | |
| ? ^ | |
| %attention.121 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.9) | |
| %linear_layers.93 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.121) | |
| %_2.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.93) | |
| %weight.65 : Tensor = prim::GetAttr[name="weight"](%_2.45) | |
| - %model.119 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? - | |
| + %model.121 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? + | |
| - %transformer_blocks.119 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.119) | |
| ? - - | |
| + %transformer_blocks.121 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.121) | |
| ? + + | |
| - %_7.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.119) | |
| ? - | |
| + %_7.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.121) | |
| ? + | |
| %attention.119 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.7) | |
| %linear_layers.91 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.119) | |
| %_1.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.91) | |
| %bias.59 : Tensor = prim::GetAttr[name="bias"](%_1.47) | |
| - %model.117 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.119 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.117 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.117) | |
| ? ^ ^ | |
| + %transformer_blocks.119 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.119) | |
| ? ^ ^ | |
| - %_7.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.117) | |
| ? ^ | |
| + %_7.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.119) | |
| ? ^ | |
| %attention.117 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.5) | |
| %linear_layers.89 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.117) | |
| %_1.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.89) | |
| %weight.63 : Tensor = prim::GetAttr[name="weight"](%_1.45) | |
| + %model.117 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.117 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.117) | |
| + %_7.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.117) | |
| + %attention.115 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.3) | |
| + %linear_layers.87 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.115) | |
| + %_0.49 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.87) | |
| + %bias.57 : Tensor = prim::GetAttr[name="bias"](%_0.49) | |
| %model.115 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.115 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.115) | |
| - %_7.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.115) | |
| ? ^ | |
| + %_7.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.115) | |
| ? ^ | |
| - %attention.115 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.3) | |
| ? ^ ^ | |
| + %attention.113 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.1) | |
| ? ^ ^ | |
| - %linear_layers.87 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.115) | |
| ? ^ ^ | |
| + %linear_layers.85 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.113) | |
| ? ^ ^ | |
| - %_0.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.87) | |
| ? ^ | |
| + %_0.47 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.85) | |
| ? ^ | |
| - %bias.57 : Tensor = prim::GetAttr[name="bias"](%_0.47) | |
| ? ^ ^^ ^^ ^ ^^ | |
| + %weight.61 : Tensor = prim::GetAttr[name="weight"](%_0.47) | |
| ? ^^ ^^^ ^^ ^^ ^^^ | |
| %model.113 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.113 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.113) | |
| - %_7.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="7"](%transformer_blocks.113) | |
| - %attention.113 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_7.1) | |
| - %linear_layers.85 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.113) | |
| - %_0.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.85) | |
| - %weight.61 : Tensor = prim::GetAttr[name="weight"](%_0.45) | |
| - %model.111 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.111 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.111) | |
| - %_6.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.111) | |
| ? ^ | |
| + %_6.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.113) | |
| ? ^ | |
| %attention.111 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.15) | |
| %output_linear.27 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.111) | |
| %bias.55 : Tensor = prim::GetAttr[name="bias"](%output_linear.27) | |
| - %model.109 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| + %model.111 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| - %transformer_blocks.109 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.109) | |
| ? ^^ ^^ | |
| + %transformer_blocks.111 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.111) | |
| ? ^^ ^^ | |
| - %_6.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.109) | |
| ? ^^ | |
| + %_6.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.111) | |
| ? ^^ | |
| %attention.109 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.13) | |
| %output_linear.25 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.109) | |
| %weight.59 : Tensor = prim::GetAttr[name="weight"](%output_linear.25) | |
| - %model.107 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.109 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.107 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.107) | |
| ? ^ ^ | |
| + %transformer_blocks.109 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.109) | |
| ? ^ ^ | |
| - %_6.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.107) | |
| ? ^ | |
| + %_6.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.109) | |
| ? ^ | |
| %attention.107 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.11) | |
| %linear_layers.83 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.107) | |
| %_2.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.83) | |
| %bias.53 : Tensor = prim::GetAttr[name="bias"](%_2.43) | |
| - %model.105 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.107 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.105 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.105) | |
| ? ^ ^ | |
| + %transformer_blocks.107 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.107) | |
| ? ^ ^ | |
| - %_6.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.105) | |
| ? ^ | |
| + %_6.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.107) | |
| ? ^ | |
| %attention.105 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.9) | |
| %linear_layers.81 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.105) | |
| %_2.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.81) | |
| %weight.57 : Tensor = prim::GetAttr[name="weight"](%_2.41) | |
| - %model.103 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.105 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.103 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.103) | |
| ? ^ ^ | |
| + %transformer_blocks.105 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.105) | |
| ? ^ ^ | |
| - %_6.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.103) | |
| ? ^ | |
| + %_6.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.105) | |
| ? ^ | |
| %attention.103 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.7) | |
| %linear_layers.79 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.103) | |
| %_1.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.79) | |
| %bias.51 : Tensor = prim::GetAttr[name="bias"](%_1.43) | |
| - %model.101 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.103 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.101 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.101) | |
| ? ^ ^ | |
| + %transformer_blocks.103 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.103) | |
| ? ^ ^ | |
| - %_6.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.101) | |
| ? ^ | |
| + %_6.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.103) | |
| ? ^ | |
| %attention.101 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.5) | |
| %linear_layers.77 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.101) | |
| %_1.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.77) | |
| %weight.55 : Tensor = prim::GetAttr[name="weight"](%_1.41) | |
| + %model.101 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.101 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.101) | |
| + %_6.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.101) | |
| + %attention.99 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.3) | |
| + %linear_layers.75 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.99) | |
| + %_0.45 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.75) | |
| + %bias.49 : Tensor = prim::GetAttr[name="bias"](%_0.45) | |
| %model.99 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.99 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.99) | |
| - %_6.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.99) | |
| ? ^ | |
| + %_6.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.99) | |
| ? ^ | |
| - %attention.99 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.3) | |
| ? ^ ^ | |
| + %attention.97 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.1) | |
| ? ^ ^ | |
| - %linear_layers.75 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.99) | |
| ? ^ ^ | |
| + %linear_layers.73 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.97) | |
| ? ^ ^ | |
| - %_0.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.75) | |
| ? ^ | |
| + %_0.43 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.73) | |
| ? ^ | |
| - %bias.49 : Tensor = prim::GetAttr[name="bias"](%_0.43) | |
| ? ^ ^^ ^^ ^ ^^ | |
| + %weight.53 : Tensor = prim::GetAttr[name="weight"](%_0.43) | |
| ? ^^ ^^^ ^^ ^^ ^^^ | |
| %model.97 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.97 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.97) | |
| - %_6.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="6"](%transformer_blocks.97) | |
| - %attention.97 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_6.1) | |
| - %linear_layers.73 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.97) | |
| - %_0.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.73) | |
| - %weight.53 : Tensor = prim::GetAttr[name="weight"](%_0.41) | |
| - %model.95 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.95 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.95) | |
| - %_5.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.95) | |
| ? ^ | |
| + %_5.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.97) | |
| ? ^ | |
| %attention.95 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.15) | |
| %output_linear.23 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.95) | |
| %bias.47 : Tensor = prim::GetAttr[name="bias"](%output_linear.23) | |
| - %model.93 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.95 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.93 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.93) | |
| ? ^ ^ | |
| + %transformer_blocks.95 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.95) | |
| ? ^ ^ | |
| - %_5.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.93) | |
| ? ^ | |
| + %_5.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.95) | |
| ? ^ | |
| %attention.93 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.13) | |
| %output_linear.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.93) | |
| %weight.51 : Tensor = prim::GetAttr[name="weight"](%output_linear.21) | |
| - %model.91 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.93 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.91 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.91) | |
| ? ^ ^ | |
| + %transformer_blocks.93 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.93) | |
| ? ^ ^ | |
| - %_5.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.91) | |
| ? ^ | |
| + %_5.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.93) | |
| ? ^ | |
| %attention.91 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.11) | |
| %linear_layers.71 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.91) | |
| %_2.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.71) | |
| %bias.45 : Tensor = prim::GetAttr[name="bias"](%_2.39) | |
| - %model.89 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? - | |
| + %model.91 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? + | |
| - %transformer_blocks.89 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.89) | |
| ? - - | |
| + %transformer_blocks.91 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.91) | |
| ? + + | |
| - %_5.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.89) | |
| ? - | |
| + %_5.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.91) | |
| ? + | |
| %attention.89 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.9) | |
| %linear_layers.69 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.89) | |
| %_2.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.69) | |
| %weight.49 : Tensor = prim::GetAttr[name="weight"](%_2.37) | |
| - %model.87 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.89 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.87 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.87) | |
| ? ^ ^ | |
| + %transformer_blocks.89 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.89) | |
| ? ^ ^ | |
| - %_5.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.87) | |
| ? ^ | |
| + %_5.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.89) | |
| ? ^ | |
| %attention.87 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.7) | |
| %linear_layers.67 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.87) | |
| %_1.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.67) | |
| %bias.43 : Tensor = prim::GetAttr[name="bias"](%_1.39) | |
| - %model.85 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.87 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.85 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.85) | |
| ? ^ ^ | |
| + %transformer_blocks.87 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.87) | |
| ? ^ ^ | |
| - %_5.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.85) | |
| ? ^ | |
| + %_5.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.87) | |
| ? ^ | |
| %attention.85 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.5) | |
| %linear_layers.65 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.85) | |
| %_1.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.65) | |
| %weight.47 : Tensor = prim::GetAttr[name="weight"](%_1.37) | |
| + %model.85 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.85 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.85) | |
| + %_5.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.85) | |
| + %attention.83 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.3) | |
| + %linear_layers.63 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.83) | |
| + %_0.41 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.63) | |
| + %bias.41 : Tensor = prim::GetAttr[name="bias"](%_0.41) | |
| %model.83 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.83 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.83) | |
| - %_5.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.83) | |
| ? ^ | |
| + %_5.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.83) | |
| ? ^ | |
| - %attention.83 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.3) | |
| ? ^ ^ | |
| + %attention.81 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.1) | |
| ? ^ ^ | |
| - %linear_layers.63 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.83) | |
| ? ^ ^ | |
| + %linear_layers.61 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.81) | |
| ? ^ ^ | |
| - %_0.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.63) | |
| ? ^ | |
| + %_0.39 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.61) | |
| ? ^ | |
| - %bias.41 : Tensor = prim::GetAttr[name="bias"](%_0.39) | |
| ? ^ ^^ ^ ^ ^^ | |
| + %weight.45 : Tensor = prim::GetAttr[name="weight"](%_0.39) | |
| ? ^^ ^^^ ^ ^^ ^^^ | |
| %model.81 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.81 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.81) | |
| - %_5.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="5"](%transformer_blocks.81) | |
| - %attention.81 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_5.1) | |
| - %linear_layers.61 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.81) | |
| - %_0.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.61) | |
| - %weight.45 : Tensor = prim::GetAttr[name="weight"](%_0.37) | |
| - %model.79 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.79 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.79) | |
| - %_4.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.79) | |
| ? ^^ | |
| + %_4.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.81) | |
| ? ^^ | |
| %attention.79 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.15) | |
| %output_linear.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.79) | |
| %bias.39 : Tensor = prim::GetAttr[name="bias"](%output_linear.19) | |
| - %model.77 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.79 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.77 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.77) | |
| ? ^ ^ | |
| + %transformer_blocks.79 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.79) | |
| ? ^ ^ | |
| - %_4.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.77) | |
| ? ^ | |
| + %_4.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.79) | |
| ? ^ | |
| %attention.77 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.13) | |
| %output_linear.17 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.77) | |
| %weight.43 : Tensor = prim::GetAttr[name="weight"](%output_linear.17) | |
| - %model.75 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.77 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.75 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.75) | |
| ? ^ ^ | |
| + %transformer_blocks.77 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.77) | |
| ? ^ ^ | |
| - %_4.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.75) | |
| ? ^ | |
| + %_4.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.77) | |
| ? ^ | |
| %attention.75 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.11) | |
| %linear_layers.59 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.75) | |
| %_2.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.59) | |
| %bias.37 : Tensor = prim::GetAttr[name="bias"](%_2.35) | |
| - %model.73 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.75 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.73 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.73) | |
| ? ^ ^ | |
| + %transformer_blocks.75 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.75) | |
| ? ^ ^ | |
| - %_4.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.73) | |
| ? ^ | |
| + %_4.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.75) | |
| ? ^ | |
| %attention.73 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.9) | |
| %linear_layers.57 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.73) | |
| %_2.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.57) | |
| %weight.41 : Tensor = prim::GetAttr[name="weight"](%_2.33) | |
| - %model.71 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.73 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.71 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.71) | |
| ? ^ ^ | |
| + %transformer_blocks.73 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.73) | |
| ? ^ ^ | |
| - %_4.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.71) | |
| ? ^ | |
| + %_4.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.73) | |
| ? ^ | |
| %attention.71 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.7) | |
| %linear_layers.55 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.71) | |
| %_1.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.55) | |
| %bias.35 : Tensor = prim::GetAttr[name="bias"](%_1.35) | |
| - %model.69 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| + %model.71 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| - %transformer_blocks.69 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.69) | |
| ? ^^ ^^ | |
| + %transformer_blocks.71 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.71) | |
| ? ^^ ^^ | |
| - %_4.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.69) | |
| ? ^^ | |
| + %_4.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.71) | |
| ? ^^ | |
| %attention.69 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.5) | |
| %linear_layers.53 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.69) | |
| %_1.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.53) | |
| %weight.39 : Tensor = prim::GetAttr[name="weight"](%_1.33) | |
| + %model.69 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.69 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.69) | |
| + %_4.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.69) | |
| + %attention.67 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.3) | |
| + %linear_layers.51 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.67) | |
| + %_0.37 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.51) | |
| + %bias.33 : Tensor = prim::GetAttr[name="bias"](%_0.37) | |
| %model.67 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.67 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.67) | |
| - %_4.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.67) | |
| ? ^ | |
| + %_4.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.67) | |
| ? ^ | |
| - %attention.67 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.3) | |
| ? ^ ^ | |
| + %attention.65 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.1) | |
| ? ^ ^ | |
| - %linear_layers.51 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.67) | |
| ? ^^ ^ | |
| + %linear_layers.49 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.65) | |
| ? ^^ ^ | |
| - %_0.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.51) | |
| ? ^^ | |
| + %_0.35 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.49) | |
| ? ^^ | |
| - %bias.33 : Tensor = prim::GetAttr[name="bias"](%_0.35) | |
| ? ^ ^^ ^ ^ ^^ | |
| + %weight.37 : Tensor = prim::GetAttr[name="weight"](%_0.35) | |
| ? ^^ ^^^ ^ ^^ ^^^ | |
| %model.65 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.65 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.65) | |
| - %_4.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="4"](%transformer_blocks.65) | |
| - %attention.65 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_4.1) | |
| - %linear_layers.49 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.65) | |
| - %_0.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.49) | |
| - %weight.37 : Tensor = prim::GetAttr[name="weight"](%_0.33) | |
| - %model.63 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.63 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.63) | |
| - %_3.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.63) | |
| ? ^ | |
| + %_3.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.65) | |
| ? ^ | |
| %attention.63 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.15) | |
| %output_linear.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.63) | |
| %bias.31 : Tensor = prim::GetAttr[name="bias"](%output_linear.15) | |
| - %model.61 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.63 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.61 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.61) | |
| ? ^ ^ | |
| + %transformer_blocks.63 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.63) | |
| ? ^ ^ | |
| - %_3.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.61) | |
| ? ^ | |
| + %_3.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.63) | |
| ? ^ | |
| %attention.61 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.13) | |
| %output_linear.13 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.61) | |
| %weight.35 : Tensor = prim::GetAttr[name="weight"](%output_linear.13) | |
| - %model.59 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| + %model.61 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| - %transformer_blocks.59 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.59) | |
| ? ^^ ^^ | |
| + %transformer_blocks.61 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.61) | |
| ? ^^ ^^ | |
| - %_3.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.59) | |
| ? ^^ | |
| + %_3.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.61) | |
| ? ^^ | |
| %attention.59 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.11) | |
| %linear_layers.47 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.59) | |
| %_2.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.47) | |
| %bias.29 : Tensor = prim::GetAttr[name="bias"](%_2.31) | |
| - %model.57 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.59 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.57 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.57) | |
| ? ^ ^ | |
| + %transformer_blocks.59 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.59) | |
| ? ^ ^ | |
| - %_3.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.57) | |
| ? ^ | |
| + %_3.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.59) | |
| ? ^ | |
| %attention.57 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.9) | |
| %linear_layers.45 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.57) | |
| %_2.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.45) | |
| %weight.33 : Tensor = prim::GetAttr[name="weight"](%_2.29) | |
| - %model.55 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.57 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.55 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.55) | |
| ? ^ ^ | |
| + %transformer_blocks.57 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.57) | |
| ? ^ ^ | |
| - %_3.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.55) | |
| ? ^ | |
| + %_3.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.57) | |
| ? ^ | |
| %attention.55 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.7) | |
| %linear_layers.43 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.55) | |
| %_1.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.43) | |
| %bias.27 : Tensor = prim::GetAttr[name="bias"](%_1.31) | |
| - %model.53 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.55 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.53 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.53) | |
| ? ^ ^ | |
| + %transformer_blocks.55 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.55) | |
| ? ^ ^ | |
| - %_3.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.53) | |
| ? ^ | |
| + %_3.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.55) | |
| ? ^ | |
| %attention.53 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.5) | |
| %linear_layers.41 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.53) | |
| %_1.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.41) | |
| %weight.31 : Tensor = prim::GetAttr[name="weight"](%_1.29) | |
| + %model.53 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.53 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.53) | |
| + %_3.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.53) | |
| + %attention.51 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.3) | |
| + %linear_layers.39 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.51) | |
| + %_0.33 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.39) | |
| + %bias.25 : Tensor = prim::GetAttr[name="bias"](%_0.33) | |
| %model.51 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.51 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.51) | |
| - %_3.3 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.51) | |
| ? ^ | |
| + %_3.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.51) | |
| ? ^ | |
| - %attention.51 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.3) | |
| ? ^^ ^ | |
| + %attention.49 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.1) | |
| ? ^^ ^ | |
| - %linear_layers.39 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.51) | |
| ? ^ ^^ | |
| + %linear_layers.37 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.49) | |
| ? ^ ^^ | |
| - %_0.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.39) | |
| ? ^ | |
| + %_0.31 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.37) | |
| ? ^ | |
| - %bias.25 : Tensor = prim::GetAttr[name="bias"](%_0.31) | |
| ? ^ ^^ ^ ^ ^^ | |
| + %weight.29 : Tensor = prim::GetAttr[name="weight"](%_0.31) | |
| ? ^^ ^^^ ^ ^^ ^^^ | |
| %model.49 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.49 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.49) | |
| - %_3.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="3"](%transformer_blocks.49) | |
| - %attention.49 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_3.1) | |
| - %linear_layers.37 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.49) | |
| - %_0.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.37) | |
| - %weight.29 : Tensor = prim::GetAttr[name="weight"](%_0.29) | |
| - %model.47 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.47 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.47) | |
| - %_2.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.47) | |
| ? ^ | |
| + %_2.27 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.49) | |
| ? ^ | |
| %attention.47 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.27) | |
| %output_linear.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.47) | |
| %bias.23 : Tensor = prim::GetAttr[name="bias"](%output_linear.11) | |
| - %model.45 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.47 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.45 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.45) | |
| ? ^ ^ | |
| + %transformer_blocks.47 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.47) | |
| ? ^ ^ | |
| - %_2.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.45) | |
| ? ^ | |
| + %_2.25 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.47) | |
| ? ^ | |
| %attention.45 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.25) | |
| %output_linear.9 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.45) | |
| %weight.27 : Tensor = prim::GetAttr[name="weight"](%output_linear.9) | |
| - %model.43 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.45 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.43 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.43) | |
| ? ^ ^ | |
| + %transformer_blocks.45 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.45) | |
| ? ^ ^ | |
| - %_2.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.43) | |
| ? ^ | |
| + %_2.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.45) | |
| ? ^ | |
| %attention.43 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.21) | |
| %linear_layers.35 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.43) | |
| %_2.23 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.35) | |
| %bias.21 : Tensor = prim::GetAttr[name="bias"](%_2.23) | |
| - %model.41 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.43 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.41 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.41) | |
| ? ^ ^ | |
| + %transformer_blocks.43 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.43) | |
| ? ^ ^ | |
| - %_2.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.41) | |
| ? ^ | |
| + %_2.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.43) | |
| ? ^ | |
| %attention.41 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.17) | |
| %linear_layers.33 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.41) | |
| %_2.19 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.33) | |
| %weight.25 : Tensor = prim::GetAttr[name="weight"](%_2.19) | |
| - %model.39 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| + %model.41 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| - %transformer_blocks.39 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.39) | |
| ? ^^ ^^ | |
| + %transformer_blocks.41 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.41) | |
| ? ^^ ^^ | |
| - %_2.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.39) | |
| ? ^^ | |
| + %_2.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.41) | |
| ? ^^ | |
| %attention.39 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.15) | |
| %linear_layers.31 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.39) | |
| %_1.27 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.31) | |
| %bias.19 : Tensor = prim::GetAttr[name="bias"](%_1.27) | |
| - %model.37 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.39 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.37 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.37) | |
| ? ^ ^ | |
| + %transformer_blocks.39 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.39) | |
| ? ^ ^ | |
| - %_2.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.37) | |
| ? ^ | |
| + %_2.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.39) | |
| ? ^ | |
| %attention.37 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.13) | |
| %linear_layers.29 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.37) | |
| %_1.25 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.29) | |
| %weight.23 : Tensor = prim::GetAttr[name="weight"](%_1.25) | |
| + %model.37 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.37 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.37) | |
| + %_2.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.37) | |
| + %attention.35 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.11) | |
| + %linear_layers.27 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.35) | |
| + %_0.29 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.27) | |
| + %bias.17 : Tensor = prim::GetAttr[name="bias"](%_0.29) | |
| %model.35 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.35 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.35) | |
| - %_2.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.35) | |
| ? ^^ | |
| + %_2.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.35) | |
| ? ^ | |
| - %attention.35 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.11) | |
| ? ^ ^^ | |
| + %attention.33 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.9) | |
| ? ^ ^ | |
| - %linear_layers.27 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.35) | |
| ? ^ ^ | |
| + %linear_layers.25 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.33) | |
| ? ^ ^ | |
| - %_0.27 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.27) | |
| ? ^ | |
| + %_0.27 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.25) | |
| ? ^ | |
| - %bias.17 : Tensor = prim::GetAttr[name="bias"](%_0.27) | |
| ? ^ ^^ - ^ ^^ | |
| + %weight.21 : Tensor = prim::GetAttr[name="weight"](%_0.27) | |
| ? ^^ ^^^ + ^^ ^^^ | |
| %model.33 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.33 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.33) | |
| - %_2.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="2"](%transformer_blocks.33) | |
| - %attention.33 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_2.9) | |
| - %linear_layers.25 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.33) | |
| - %_0.25 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.25) | |
| - %weight.21 : Tensor = prim::GetAttr[name="weight"](%_0.25) | |
| - %model.31 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.31 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.31) | |
| - %_1.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.31) | |
| ? ^ | |
| + %_1.23 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.33) | |
| ? ^ | |
| %attention.31 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.23) | |
| %output_linear.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.31) | |
| %bias.15 : Tensor = prim::GetAttr[name="bias"](%output_linear.7) | |
| - %model.29 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| + %model.31 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^^ | |
| - %transformer_blocks.29 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.29) | |
| ? ^^ ^^ | |
| + %transformer_blocks.31 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.31) | |
| ? ^^ ^^ | |
| - %_1.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.29) | |
| ? ^^ | |
| + %_1.21 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.31) | |
| ? ^^ | |
| %attention.29 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.21) | |
| %output_linear.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.29) | |
| %weight.19 : Tensor = prim::GetAttr[name="weight"](%output_linear.5) | |
| - %model.27 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.29 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.27 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.27) | |
| ? ^ ^ | |
| + %transformer_blocks.29 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.29) | |
| ? ^ ^ | |
| - %_1.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.27) | |
| ? ^ | |
| + %_1.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.29) | |
| ? ^ | |
| %attention.27 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.19) | |
| %linear_layers.23 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.27) | |
| %_2.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.23) | |
| %bias.13 : Tensor = prim::GetAttr[name="bias"](%_2.7) | |
| - %model.25 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.27 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.25 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.25) | |
| ? ^ ^ | |
| + %transformer_blocks.27 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.27) | |
| ? ^ ^ | |
| - %_1.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.25) | |
| ? ^ | |
| + %_1.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.27) | |
| ? ^ | |
| %attention.25 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.17) | |
| %linear_layers.21 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.25) | |
| %_2.5 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.21) | |
| %weight.17 : Tensor = prim::GetAttr[name="weight"](%_2.5) | |
| - %model.23 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.25 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.23 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.23) | |
| ? ^ ^ | |
| + %transformer_blocks.25 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.25) | |
| ? ^ ^ | |
| - %_1.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.23) | |
| ? ^ | |
| + %_1.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.25) | |
| ? ^ | |
| %attention.23 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.13) | |
| %linear_layers.19 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.23) | |
| %_1.15 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.19) | |
| %bias.11 : Tensor = prim::GetAttr[name="bias"](%_1.15) | |
| - %model.21 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| + %model.23 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ^ | |
| - %transformer_blocks.21 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.21) | |
| ? ^ ^ | |
| + %transformer_blocks.23 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.23) | |
| ? ^ ^ | |
| - %_1.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.21) | |
| ? ^ | |
| + %_1.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.23) | |
| ? ^ | |
| %attention.21 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.9) | |
| %linear_layers.17 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.21) | |
| %_1.11 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.17) | |
| %weight.15 : Tensor = prim::GetAttr[name="weight"](%_1.11) | |
| + %model.21 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %transformer_blocks.21 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.21) | |
| + %_1.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.21) | |
| + %attention.19 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.7) | |
| + %linear_layers.15 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.19) | |
| + %_0.25 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.15) | |
| + %bias.9 : Tensor = prim::GetAttr[name="bias"](%_0.25) | |
| %model.19 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.19 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.19) | |
| - %_1.7 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.19) | |
| ? ^ | |
| + %_1.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.19) | |
| ? ^ | |
| - %attention.19 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.7) | |
| - %linear_layers.15 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.19) | |
| - %_0.23 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.15) | |
| - %bias.9 : Tensor = prim::GetAttr[name="bias"](%_0.23) | |
| - %model.17 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| - %transformer_blocks.17 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.17) | |
| - %_1.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="1"](%transformer_blocks.17) | |
| %attention.17 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_1.5) | |
| %linear_layers.13 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.17) | |
| - %_0.21 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.13) | |
| ? ^ | |
| + %_0.23 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.13) | |
| ? ^ | |
| - %weight.13 : Tensor = prim::GetAttr[name="weight"](%_0.21) | |
| ? ^ | |
| + %weight.13 : Tensor = prim::GetAttr[name="weight"](%_0.23) | |
| ? ^ | |
| %model.15 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.15 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.15) | |
| %_0.19 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.15) | |
| %attention.15 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.19) | |
| %output_linear.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.15) | |
| %bias.7 : Tensor = prim::GetAttr[name="bias"](%output_linear.3) | |
| %model.13 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.13 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.13) | |
| %_0.17 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.13) | |
| %attention.13 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.17) | |
| %output_linear.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="output_linear"](%attention.13) | |
| %weight.11 : Tensor = prim::GetAttr[name="weight"](%output_linear.1) | |
| %model.11 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.11 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.11) | |
| %_0.15 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.11) | |
| %attention.11 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.15) | |
| %linear_layers.11 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.11) | |
| %_2.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.11) | |
| %bias.5 : Tensor = prim::GetAttr[name="bias"](%_2.3) | |
| %model.9 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.9 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.9) | |
| %_0.13 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.9) | |
| %attention.9 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.13) | |
| %linear_layers.9 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.9) | |
| %_2.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="2"](%linear_layers.9) | |
| %weight.9 : Tensor = prim::GetAttr[name="weight"](%_2.1) | |
| %model.7 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.7 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.7) | |
| %_0.11 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.7) | |
| %attention.7 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.11) | |
| %linear_layers.7 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.7) | |
| %_1.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.7) | |
| %bias.3 : Tensor = prim::GetAttr[name="bias"](%_1.3) | |
| %model.5 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.5 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.5) | |
| %_0.9 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.5) | |
| %attention.5 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.9) | |
| %linear_layers.5 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.5) | |
| %_1.1 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="1"](%linear_layers.5) | |
| %weight.7 : Tensor = prim::GetAttr[name="weight"](%_1.1) | |
| %model.3 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.3 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.3) | |
| %_0.5 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.3) | |
| %attention.3 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.5) | |
| %linear_layers.3 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.3) | |
| %_0.7 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.3) | |
| %bias.1 : Tensor = prim::GetAttr[name="bias"](%_0.7) | |
| %model.1 : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| %transformer_blocks.1 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="transformer_blocks"](%model.1) | |
| %_0.1 : __torch__.bert_pytorch.model.transformer.TransformerBlock = prim::GetAttr[name="0"](%transformer_blocks.1) | |
| %attention.1 : __torch__.bert_pytorch.model.attention.multi_head.MultiHeadedAttention = prim::GetAttr[name="attention"](%_0.1) | |
| %linear_layers.1 : __torch__.torch.nn.modules.container.ModuleList = prim::GetAttr[name="linear_layers"](%attention.1) | |
| %_0.3 : __torch__.torch.nn.modules.linear.Linear = prim::GetAttr[name="0"](%linear_layers.1) | |
| %weight.5 : Tensor = prim::GetAttr[name="weight"](%_0.3) | |
| %1516 : int = prim::Constant[value=0]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| %1517 : Tensor = aten::gt(%tokens, %1516) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| %1518 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| %1519 : Tensor = aten::unsqueeze(%1517, %1518) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| %1520 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| %1521 : int = aten::size(%tokens, %1520) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| %1522 : Tensor = prim::NumToTensor(%1521) | |
| %1523 : int = aten::Int(%1522) | |
| %1524 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| %1525 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| %1526 : int[] = prim::ListConstruct(%1524, %1523, %1525) | |
| %1527 : Tensor = aten::repeat(%1519, %1526) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| %1528 : int = prim::Constant[value=1]() # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| - %mask : Tensor = aten::unsqueeze(%1527, %1528) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| + %mask.1 : Tensor = aten::unsqueeze(%1527, %1528) # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/bert.py:40:0 | |
| ? ++ | |
| %1567 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.embedding/__module.model.embedding.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1568 : bool = prim::Constant[value=1](), scope: __module.model.embedding/__module.model.embedding.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1569 : int = prim::Constant[value=1](), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
| %1570 : int = prim::Constant[value=9223372036854775807](), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
| %1571 : int = prim::Constant[value=0](), scope: __module.model.embedding/__module.model.embedding.token # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:2148:0 | |
| %1572 : bool = prim::Constant[value=0](), scope: __module.model.embedding/__module.model.embedding.token # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:2148:0 | |
| %dropout.1 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%embedding) | |
| %segment : __torch__.bert_pytorch.model.embedding.segment.SegmentEmbedding = prim::GetAttr[name="segment"](%embedding) | |
| %position : __torch__.bert_pytorch.model.embedding.position.PositionalEmbedding = prim::GetAttr[name="position"](%embedding) | |
| %token : __torch__.bert_pytorch.model.embedding.token.TokenEmbedding = prim::GetAttr[name="token"](%embedding) | |
| %weight.101 : Tensor = prim::GetAttr[name="weight"](%token) | |
| %1578 : Tensor = aten::embedding(%weight.101, %tokens, %1571, %1572, %1572), scope: __module.model.embedding/__module.model.embedding.token # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:2148:0 | |
| %pe : Tensor = prim::GetAttr[name="pe"](%position) | |
| %1580 : int = aten::size(%tokens, %1569), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
| %1581 : Tensor = aten::slice(%pe, %1571, %1571, %1570, %1569), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
| %1582 : Tensor = aten::slice(%1581, %1569, %1571, %1580, %1569), scope: __module.model.embedding/__module.model.embedding.position # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/position.py:25:0 | |
| %1583 : Tensor = aten::add(%1578, %1582, %1569), scope: __module.model.embedding # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/bert.py:32:0 | |
| %weight.103 : Tensor = prim::GetAttr[name="weight"](%segment) | |
| %1585 : Tensor = aten::embedding(%weight.103, %tokens, %1571, %1572, %1572), scope: __module.model.embedding/__module.model.embedding.segment # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:2148:0 | |
| %input.1 : Tensor = aten::add(%1583, %1585, %1569), scope: __module.model.embedding # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/embedding/bert.py:32:0 | |
| %x.1 : Tensor = aten::dropout(%input.1, %1567, %1568), scope: __module.model.embedding/__module.model.embedding.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1588 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1589 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1590 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1591 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %1592 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention/__module.model.transformer_blocks.0.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1593 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1594 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1595 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1596 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1597 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1598 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1599 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm | |
| %1600 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1601 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.1 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.1) | |
| %b_2.1 : Tensor = prim::GetAttr[name="b_2"](%norm.1) | |
| %a_2.1 : Tensor = prim::GetAttr[name="a_2"](%norm.1) | |
| %1605 : int[] = prim::ListConstruct(%1597), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm | |
| %mean.1 : Tensor = aten::mean(%x.1, %1605, %1598, %1599), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1607 : int[] = prim::ListConstruct(%1597), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm | |
| %std.1 : Tensor = aten::std(%x.1, %1607, %1598, %1598), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %1609 : Tensor = aten::sub(%x.1, %mean.1, %1600), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1610 : Tensor = aten::mul(%a_2.1, %1609), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1611 : Tensor = aten::add(%std.1, %1601, %1600), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1612 : Tensor = aten::div(%1610, %1611), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.1 : Tensor = aten::add(%1612, %b_2.1, %1600), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1614 : int = aten::size(%query.1, %1596), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1615 : Tensor = aten::linear(%query.1, %weight.5, %bias.1), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1616 : int[] = prim::ListConstruct(%1614, %1597, %1595, %1594), scope: __module.model.transformer_blocks.0.input_sublayer | |
| %1617 : Tensor = aten::view(%1615, %1616), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.3 : Tensor = aten::transpose(%1617, %1600, %1593), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1619 : Tensor = aten::linear(%query.1, %weight.7, %bias.3), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1620 : int[] = prim::ListConstruct(%1614, %1597, %1595, %1594), scope: __module.model.transformer_blocks.0.input_sublayer | |
| %1621 : Tensor = aten::view(%1619, %1620), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.1 : Tensor = aten::transpose(%1621, %1600, %1593), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1623 : Tensor = aten::linear(%query.1, %weight.9, %bias.5), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1624 : int[] = prim::ListConstruct(%1614, %1597, %1595, %1594), scope: __module.model.transformer_blocks.0.input_sublayer | |
| %1625 : Tensor = aten::view(%1623, %1624), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.1 : Tensor = aten::transpose(%1625, %1600, %1593), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1627 : Tensor = aten::transpose(%key.1, %1589, %1597), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1628 : Tensor = aten::matmul(%query.3, %1627), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.1 : Tensor = aten::div(%1628, %1590), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %1630 : Tensor = aten::eq(%mask, %1596), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %1630 : Tensor = aten::eq(%mask.1, %1596), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.3 : Tensor = aten::masked_fill(%scores.1, %1630, %1591), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.5 : Tensor = aten::softmax(%input.3, %1597, %1599), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.1 : Tensor = aten::dropout(%input.5, %1592, %1598), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention/__module.model.transformer_blocks.0.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.3 : Tensor = aten::matmul(%p_attn.1, %value.1), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %1635 : Tensor = aten::transpose(%x.3, %1600, %1593), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1636 : Tensor = aten::contiguous(%1635, %1596), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1637 : int[] = prim::ListConstruct(%1614, %1597, %1588), scope: __module.model.transformer_blocks.0.input_sublayer | |
| %input.7 : Tensor = aten::view(%1636, %1637), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.9 : Tensor = aten::linear(%input.7, %weight.11, %bias.7), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1640 : Tensor = aten::dropout(%input.9, %1592, %1598), scope: __module.model.transformer_blocks.0.input_sublayer/__module.model.transformer_blocks.0.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.5 : Tensor = aten::add(%x.1, %1640, %1600), scope: __module.model.transformer_blocks.0.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %1642 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1643 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %1644 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1645 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1646 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm | |
| %1647 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1648 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.9 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.1) | |
| %norm.3 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.1) | |
| %b_2.3 : Tensor = prim::GetAttr[name="b_2"](%norm.3) | |
| %a_2.3 : Tensor = prim::GetAttr[name="a_2"](%norm.3) | |
| %1653 : int[] = prim::ListConstruct(%1644), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm | |
| %mean.3 : Tensor = aten::mean(%x.5, %1653, %1645, %1646), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1655 : int[] = prim::ListConstruct(%1644), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm | |
| %std.3 : Tensor = aten::std(%x.5, %1655, %1645, %1645), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %1657 : Tensor = aten::sub(%x.5, %mean.3, %1647), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1658 : Tensor = aten::mul(%a_2.3, %1657), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1659 : Tensor = aten::add(%std.3, %1648, %1647), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1660 : Tensor = aten::div(%1658, %1659), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.11 : Tensor = aten::add(%1660, %b_2.3, %1647), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.97 : Tensor = prim::GetAttr[name="bias"](%w_1.1) | |
| %weight.105 : Tensor = prim::GetAttr[name="weight"](%w_1.1) | |
| %input.13 : Tensor = aten::linear(%input.11, %weight.105, %bias.97), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.15 : Tensor = aten::gelu(%input.13, %1643), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.17 : Tensor = aten::dropout(%input.15, %1642, %1645), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.99 : Tensor = prim::GetAttr[name="bias"](%w_2.1) | |
| %weight.107 : Tensor = prim::GetAttr[name="weight"](%w_2.1) | |
| %input.19 : Tensor = aten::linear(%input.17, %weight.107, %bias.99), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1670 : Tensor = aten::dropout(%input.19, %1642, %1645), scope: __module.model.transformer_blocks.0.output_sublayer/__module.model.transformer_blocks.0.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.21 : Tensor = aten::add(%x.5, %1670, %1647), scope: __module.model.transformer_blocks.0.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %1672 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.0.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1673 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.0.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.7 : Tensor = aten::dropout(%input.21, %1673, %1672), scope: __module.model.transformer_blocks.0.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1675 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1676 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1677 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1678 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %1679 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention/__module.model.transformer_blocks.1.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1680 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1681 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1682 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1683 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1684 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1685 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1686 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm | |
| %1687 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1688 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.5 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.3) | |
| %b_2.5 : Tensor = prim::GetAttr[name="b_2"](%norm.5) | |
| %a_2.5 : Tensor = prim::GetAttr[name="a_2"](%norm.5) | |
| %1692 : int[] = prim::ListConstruct(%1684), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm | |
| %mean.5 : Tensor = aten::mean(%x.7, %1692, %1685, %1686), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1694 : int[] = prim::ListConstruct(%1684), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm | |
| %std.5 : Tensor = aten::std(%x.7, %1694, %1685, %1685), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %1696 : Tensor = aten::sub(%x.7, %mean.5, %1687), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1697 : Tensor = aten::mul(%a_2.5, %1696), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1698 : Tensor = aten::add(%std.5, %1688, %1687), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1699 : Tensor = aten::div(%1697, %1698), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.5 : Tensor = aten::add(%1699, %b_2.5, %1687), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1701 : int = aten::size(%query.5, %1683), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1702 : Tensor = aten::linear(%query.5, %weight.13, %bias.9), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1703 : int[] = prim::ListConstruct(%1701, %1684, %1682, %1681), scope: __module.model.transformer_blocks.1.input_sublayer | |
| %1704 : Tensor = aten::view(%1702, %1703), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.7 : Tensor = aten::transpose(%1704, %1687, %1680), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1706 : Tensor = aten::linear(%query.5, %weight.15, %bias.11), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1707 : int[] = prim::ListConstruct(%1701, %1684, %1682, %1681), scope: __module.model.transformer_blocks.1.input_sublayer | |
| %1708 : Tensor = aten::view(%1706, %1707), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.3 : Tensor = aten::transpose(%1708, %1687, %1680), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1710 : Tensor = aten::linear(%query.5, %weight.17, %bias.13), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1711 : int[] = prim::ListConstruct(%1701, %1684, %1682, %1681), scope: __module.model.transformer_blocks.1.input_sublayer | |
| %1712 : Tensor = aten::view(%1710, %1711), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.3 : Tensor = aten::transpose(%1712, %1687, %1680), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1714 : Tensor = aten::transpose(%key.3, %1676, %1684), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1715 : Tensor = aten::matmul(%query.7, %1714), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.3 : Tensor = aten::div(%1715, %1677), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %1717 : Tensor = aten::eq(%mask, %1683), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %1717 : Tensor = aten::eq(%mask.1, %1683), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.23 : Tensor = aten::masked_fill(%scores.3, %1717, %1678), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.25 : Tensor = aten::softmax(%input.23, %1684, %1686), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.3 : Tensor = aten::dropout(%input.25, %1679, %1685), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention/__module.model.transformer_blocks.1.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.9 : Tensor = aten::matmul(%p_attn.3, %value.3), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %1722 : Tensor = aten::transpose(%x.9, %1687, %1680), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1723 : Tensor = aten::contiguous(%1722, %1683), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1724 : int[] = prim::ListConstruct(%1701, %1684, %1675), scope: __module.model.transformer_blocks.1.input_sublayer | |
| %input.27 : Tensor = aten::view(%1723, %1724), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.29 : Tensor = aten::linear(%input.27, %weight.19, %bias.15), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1727 : Tensor = aten::dropout(%input.29, %1679, %1685), scope: __module.model.transformer_blocks.1.input_sublayer/__module.model.transformer_blocks.1.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.11 : Tensor = aten::add(%x.7, %1727, %1687), scope: __module.model.transformer_blocks.1.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %1729 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1730 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %1731 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1732 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1733 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm | |
| %1734 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1735 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.21 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.3) | |
| %norm.7 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.3) | |
| %b_2.7 : Tensor = prim::GetAttr[name="b_2"](%norm.7) | |
| %a_2.7 : Tensor = prim::GetAttr[name="a_2"](%norm.7) | |
| %1740 : int[] = prim::ListConstruct(%1731), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm | |
| %mean.7 : Tensor = aten::mean(%x.11, %1740, %1732, %1733), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1742 : int[] = prim::ListConstruct(%1731), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm | |
| %std.7 : Tensor = aten::std(%x.11, %1742, %1732, %1732), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %1744 : Tensor = aten::sub(%x.11, %mean.7, %1734), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1745 : Tensor = aten::mul(%a_2.7, %1744), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1746 : Tensor = aten::add(%std.7, %1735, %1734), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1747 : Tensor = aten::div(%1745, %1746), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.31 : Tensor = aten::add(%1747, %b_2.7, %1734), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.101 : Tensor = prim::GetAttr[name="bias"](%w_1.3) | |
| %weight.109 : Tensor = prim::GetAttr[name="weight"](%w_1.3) | |
| %input.33 : Tensor = aten::linear(%input.31, %weight.109, %bias.101), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.35 : Tensor = aten::gelu(%input.33, %1730), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.37 : Tensor = aten::dropout(%input.35, %1729, %1732), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.103 : Tensor = prim::GetAttr[name="bias"](%w_2.3) | |
| %weight.111 : Tensor = prim::GetAttr[name="weight"](%w_2.3) | |
| %input.39 : Tensor = aten::linear(%input.37, %weight.111, %bias.103), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1757 : Tensor = aten::dropout(%input.39, %1729, %1732), scope: __module.model.transformer_blocks.1.output_sublayer/__module.model.transformer_blocks.1.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.41 : Tensor = aten::add(%x.11, %1757, %1734), scope: __module.model.transformer_blocks.1.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %1759 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.1.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1760 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.1.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.13 : Tensor = aten::dropout(%input.41, %1760, %1759), scope: __module.model.transformer_blocks.1.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1762 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1763 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1764 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1765 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %1766 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention/__module.model.transformer_blocks.2.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1767 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1768 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1769 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1770 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1771 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1772 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1773 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm | |
| %1774 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1775 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.9 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.5) | |
| %b_2.9 : Tensor = prim::GetAttr[name="b_2"](%norm.9) | |
| %a_2.9 : Tensor = prim::GetAttr[name="a_2"](%norm.9) | |
| %1779 : int[] = prim::ListConstruct(%1771), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm | |
| %mean.9 : Tensor = aten::mean(%x.13, %1779, %1772, %1773), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1781 : int[] = prim::ListConstruct(%1771), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm | |
| %std.9 : Tensor = aten::std(%x.13, %1781, %1772, %1772), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %1783 : Tensor = aten::sub(%x.13, %mean.9, %1774), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1784 : Tensor = aten::mul(%a_2.9, %1783), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1785 : Tensor = aten::add(%std.9, %1775, %1774), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1786 : Tensor = aten::div(%1784, %1785), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.9 : Tensor = aten::add(%1786, %b_2.9, %1774), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1788 : int = aten::size(%query.9, %1770), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1789 : Tensor = aten::linear(%query.9, %weight.21, %bias.17), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1790 : int[] = prim::ListConstruct(%1788, %1771, %1769, %1768), scope: __module.model.transformer_blocks.2.input_sublayer | |
| %1791 : Tensor = aten::view(%1789, %1790), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.11 : Tensor = aten::transpose(%1791, %1774, %1767), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1793 : Tensor = aten::linear(%query.9, %weight.23, %bias.19), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1794 : int[] = prim::ListConstruct(%1788, %1771, %1769, %1768), scope: __module.model.transformer_blocks.2.input_sublayer | |
| %1795 : Tensor = aten::view(%1793, %1794), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.5 : Tensor = aten::transpose(%1795, %1774, %1767), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1797 : Tensor = aten::linear(%query.9, %weight.25, %bias.21), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1798 : int[] = prim::ListConstruct(%1788, %1771, %1769, %1768), scope: __module.model.transformer_blocks.2.input_sublayer | |
| %1799 : Tensor = aten::view(%1797, %1798), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.5 : Tensor = aten::transpose(%1799, %1774, %1767), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1801 : Tensor = aten::transpose(%key.5, %1763, %1771), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1802 : Tensor = aten::matmul(%query.11, %1801), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.5 : Tensor = aten::div(%1802, %1764), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %1804 : Tensor = aten::eq(%mask, %1770), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %1804 : Tensor = aten::eq(%mask.1, %1770), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.43 : Tensor = aten::masked_fill(%scores.5, %1804, %1765), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.45 : Tensor = aten::softmax(%input.43, %1771, %1773), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.5 : Tensor = aten::dropout(%input.45, %1766, %1772), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention/__module.model.transformer_blocks.2.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.15 : Tensor = aten::matmul(%p_attn.5, %value.5), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %1809 : Tensor = aten::transpose(%x.15, %1774, %1767), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1810 : Tensor = aten::contiguous(%1809, %1770), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1811 : int[] = prim::ListConstruct(%1788, %1771, %1762), scope: __module.model.transformer_blocks.2.input_sublayer | |
| %input.47 : Tensor = aten::view(%1810, %1811), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.49 : Tensor = aten::linear(%input.47, %weight.27, %bias.23), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1814 : Tensor = aten::dropout(%input.49, %1766, %1772), scope: __module.model.transformer_blocks.2.input_sublayer/__module.model.transformer_blocks.2.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.17 : Tensor = aten::add(%x.13, %1814, %1774), scope: __module.model.transformer_blocks.2.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %1816 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1817 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %1818 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1819 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1820 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm | |
| %1821 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1822 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.33 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.5) | |
| %norm.11 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.5) | |
| %b_2.11 : Tensor = prim::GetAttr[name="b_2"](%norm.11) | |
| %a_2.11 : Tensor = prim::GetAttr[name="a_2"](%norm.11) | |
| %1827 : int[] = prim::ListConstruct(%1818), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm | |
| %mean.11 : Tensor = aten::mean(%x.17, %1827, %1819, %1820), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1829 : int[] = prim::ListConstruct(%1818), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm | |
| %std.11 : Tensor = aten::std(%x.17, %1829, %1819, %1819), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %1831 : Tensor = aten::sub(%x.17, %mean.11, %1821), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1832 : Tensor = aten::mul(%a_2.11, %1831), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1833 : Tensor = aten::add(%std.11, %1822, %1821), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1834 : Tensor = aten::div(%1832, %1833), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.51 : Tensor = aten::add(%1834, %b_2.11, %1821), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.105 : Tensor = prim::GetAttr[name="bias"](%w_1.5) | |
| %weight.113 : Tensor = prim::GetAttr[name="weight"](%w_1.5) | |
| %input.53 : Tensor = aten::linear(%input.51, %weight.113, %bias.105), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.55 : Tensor = aten::gelu(%input.53, %1817), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.57 : Tensor = aten::dropout(%input.55, %1816, %1819), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.107 : Tensor = prim::GetAttr[name="bias"](%w_2.5) | |
| %weight.115 : Tensor = prim::GetAttr[name="weight"](%w_2.5) | |
| %input.59 : Tensor = aten::linear(%input.57, %weight.115, %bias.107), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1844 : Tensor = aten::dropout(%input.59, %1816, %1819), scope: __module.model.transformer_blocks.2.output_sublayer/__module.model.transformer_blocks.2.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.61 : Tensor = aten::add(%x.17, %1844, %1821), scope: __module.model.transformer_blocks.2.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %1846 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.2.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1847 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.2.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.19 : Tensor = aten::dropout(%input.61, %1847, %1846), scope: __module.model.transformer_blocks.2.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1849 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1850 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1851 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1852 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %1853 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention/__module.model.transformer_blocks.3.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1854 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1855 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1856 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1857 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1858 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1859 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1860 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm | |
| %1861 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1862 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.13 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.7) | |
| %b_2.13 : Tensor = prim::GetAttr[name="b_2"](%norm.13) | |
| %a_2.13 : Tensor = prim::GetAttr[name="a_2"](%norm.13) | |
| %1866 : int[] = prim::ListConstruct(%1858), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm | |
| %mean.13 : Tensor = aten::mean(%x.19, %1866, %1859, %1860), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1868 : int[] = prim::ListConstruct(%1858), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm | |
| %std.13 : Tensor = aten::std(%x.19, %1868, %1859, %1859), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %1870 : Tensor = aten::sub(%x.19, %mean.13, %1861), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1871 : Tensor = aten::mul(%a_2.13, %1870), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1872 : Tensor = aten::add(%std.13, %1862, %1861), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1873 : Tensor = aten::div(%1871, %1872), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.13 : Tensor = aten::add(%1873, %b_2.13, %1861), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1875 : int = aten::size(%query.13, %1857), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1876 : Tensor = aten::linear(%query.13, %weight.29, %bias.25), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1877 : int[] = prim::ListConstruct(%1875, %1858, %1856, %1855), scope: __module.model.transformer_blocks.3.input_sublayer | |
| %1878 : Tensor = aten::view(%1876, %1877), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.15 : Tensor = aten::transpose(%1878, %1861, %1854), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1880 : Tensor = aten::linear(%query.13, %weight.31, %bias.27), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1881 : int[] = prim::ListConstruct(%1875, %1858, %1856, %1855), scope: __module.model.transformer_blocks.3.input_sublayer | |
| %1882 : Tensor = aten::view(%1880, %1881), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.7 : Tensor = aten::transpose(%1882, %1861, %1854), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1884 : Tensor = aten::linear(%query.13, %weight.33, %bias.29), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1885 : int[] = prim::ListConstruct(%1875, %1858, %1856, %1855), scope: __module.model.transformer_blocks.3.input_sublayer | |
| %1886 : Tensor = aten::view(%1884, %1885), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.7 : Tensor = aten::transpose(%1886, %1861, %1854), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1888 : Tensor = aten::transpose(%key.7, %1850, %1858), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1889 : Tensor = aten::matmul(%query.15, %1888), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.7 : Tensor = aten::div(%1889, %1851), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %1891 : Tensor = aten::eq(%mask, %1857), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %1891 : Tensor = aten::eq(%mask.1, %1857), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.63 : Tensor = aten::masked_fill(%scores.7, %1891, %1852), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.65 : Tensor = aten::softmax(%input.63, %1858, %1860), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.7 : Tensor = aten::dropout(%input.65, %1853, %1859), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention/__module.model.transformer_blocks.3.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.21 : Tensor = aten::matmul(%p_attn.7, %value.7), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %1896 : Tensor = aten::transpose(%x.21, %1861, %1854), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1897 : Tensor = aten::contiguous(%1896, %1857), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1898 : int[] = prim::ListConstruct(%1875, %1858, %1849), scope: __module.model.transformer_blocks.3.input_sublayer | |
| %input.67 : Tensor = aten::view(%1897, %1898), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.69 : Tensor = aten::linear(%input.67, %weight.35, %bias.31), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1901 : Tensor = aten::dropout(%input.69, %1853, %1859), scope: __module.model.transformer_blocks.3.input_sublayer/__module.model.transformer_blocks.3.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.23 : Tensor = aten::add(%x.19, %1901, %1861), scope: __module.model.transformer_blocks.3.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %1903 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1904 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %1905 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1906 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1907 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm | |
| %1908 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1909 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.45 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.7) | |
| %norm.15 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.7) | |
| %b_2.15 : Tensor = prim::GetAttr[name="b_2"](%norm.15) | |
| %a_2.15 : Tensor = prim::GetAttr[name="a_2"](%norm.15) | |
| %1914 : int[] = prim::ListConstruct(%1905), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm | |
| %mean.15 : Tensor = aten::mean(%x.23, %1914, %1906, %1907), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1916 : int[] = prim::ListConstruct(%1905), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm | |
| %std.15 : Tensor = aten::std(%x.23, %1916, %1906, %1906), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %1918 : Tensor = aten::sub(%x.23, %mean.15, %1908), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1919 : Tensor = aten::mul(%a_2.15, %1918), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1920 : Tensor = aten::add(%std.15, %1909, %1908), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1921 : Tensor = aten::div(%1919, %1920), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.71 : Tensor = aten::add(%1921, %b_2.15, %1908), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.109 : Tensor = prim::GetAttr[name="bias"](%w_1.7) | |
| %weight.117 : Tensor = prim::GetAttr[name="weight"](%w_1.7) | |
| %input.73 : Tensor = aten::linear(%input.71, %weight.117, %bias.109), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.75 : Tensor = aten::gelu(%input.73, %1904), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.77 : Tensor = aten::dropout(%input.75, %1903, %1906), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.111 : Tensor = prim::GetAttr[name="bias"](%w_2.7) | |
| %weight.119 : Tensor = prim::GetAttr[name="weight"](%w_2.7) | |
| %input.79 : Tensor = aten::linear(%input.77, %weight.119, %bias.111), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1931 : Tensor = aten::dropout(%input.79, %1903, %1906), scope: __module.model.transformer_blocks.3.output_sublayer/__module.model.transformer_blocks.3.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.81 : Tensor = aten::add(%x.23, %1931, %1908), scope: __module.model.transformer_blocks.3.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %1933 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.3.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1934 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.3.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.25 : Tensor = aten::dropout(%input.81, %1934, %1933), scope: __module.model.transformer_blocks.3.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1936 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1937 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1938 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1939 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %1940 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention/__module.model.transformer_blocks.4.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1941 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1942 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1943 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1944 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1945 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1946 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1947 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm | |
| %1948 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1949 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.17 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.9) | |
| %b_2.17 : Tensor = prim::GetAttr[name="b_2"](%norm.17) | |
| %a_2.17 : Tensor = prim::GetAttr[name="a_2"](%norm.17) | |
| %1953 : int[] = prim::ListConstruct(%1945), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm | |
| %mean.17 : Tensor = aten::mean(%x.25, %1953, %1946, %1947), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1955 : int[] = prim::ListConstruct(%1945), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm | |
| %std.17 : Tensor = aten::std(%x.25, %1955, %1946, %1946), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %1957 : Tensor = aten::sub(%x.25, %mean.17, %1948), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1958 : Tensor = aten::mul(%a_2.17, %1957), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1959 : Tensor = aten::add(%std.17, %1949, %1948), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1960 : Tensor = aten::div(%1958, %1959), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.17 : Tensor = aten::add(%1960, %b_2.17, %1948), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1962 : int = aten::size(%query.17, %1944), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %1963 : Tensor = aten::linear(%query.17, %weight.37, %bias.33), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1964 : int[] = prim::ListConstruct(%1962, %1945, %1943, %1942), scope: __module.model.transformer_blocks.4.input_sublayer | |
| %1965 : Tensor = aten::view(%1963, %1964), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.19 : Tensor = aten::transpose(%1965, %1948, %1941), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1967 : Tensor = aten::linear(%query.17, %weight.39, %bias.35), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1968 : int[] = prim::ListConstruct(%1962, %1945, %1943, %1942), scope: __module.model.transformer_blocks.4.input_sublayer | |
| %1969 : Tensor = aten::view(%1967, %1968), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.9 : Tensor = aten::transpose(%1969, %1948, %1941), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1971 : Tensor = aten::linear(%query.17, %weight.41, %bias.37), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1972 : int[] = prim::ListConstruct(%1962, %1945, %1943, %1942), scope: __module.model.transformer_blocks.4.input_sublayer | |
| %1973 : Tensor = aten::view(%1971, %1972), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.9 : Tensor = aten::transpose(%1973, %1948, %1941), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %1975 : Tensor = aten::transpose(%key.9, %1937, %1945), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %1976 : Tensor = aten::matmul(%query.19, %1975), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.9 : Tensor = aten::div(%1976, %1938), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %1978 : Tensor = aten::eq(%mask, %1944), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %1978 : Tensor = aten::eq(%mask.1, %1944), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.83 : Tensor = aten::masked_fill(%scores.9, %1978, %1939), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.85 : Tensor = aten::softmax(%input.83, %1945, %1947), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.9 : Tensor = aten::dropout(%input.85, %1940, %1946), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention/__module.model.transformer_blocks.4.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.27 : Tensor = aten::matmul(%p_attn.9, %value.9), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %1983 : Tensor = aten::transpose(%x.27, %1948, %1941), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1984 : Tensor = aten::contiguous(%1983, %1944), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %1985 : int[] = prim::ListConstruct(%1962, %1945, %1936), scope: __module.model.transformer_blocks.4.input_sublayer | |
| %input.87 : Tensor = aten::view(%1984, %1985), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.89 : Tensor = aten::linear(%input.87, %weight.43, %bias.39), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %1988 : Tensor = aten::dropout(%input.89, %1940, %1946), scope: __module.model.transformer_blocks.4.input_sublayer/__module.model.transformer_blocks.4.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.29 : Tensor = aten::add(%x.25, %1988, %1948), scope: __module.model.transformer_blocks.4.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %1990 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %1991 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %1992 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1993 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %1994 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm | |
| %1995 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %1996 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.57 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.9) | |
| %norm.19 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.9) | |
| %b_2.19 : Tensor = prim::GetAttr[name="b_2"](%norm.19) | |
| %a_2.19 : Tensor = prim::GetAttr[name="a_2"](%norm.19) | |
| %2001 : int[] = prim::ListConstruct(%1992), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm | |
| %mean.19 : Tensor = aten::mean(%x.29, %2001, %1993, %1994), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2003 : int[] = prim::ListConstruct(%1992), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm | |
| %std.19 : Tensor = aten::std(%x.29, %2003, %1993, %1993), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2005 : Tensor = aten::sub(%x.29, %mean.19, %1995), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2006 : Tensor = aten::mul(%a_2.19, %2005), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2007 : Tensor = aten::add(%std.19, %1996, %1995), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2008 : Tensor = aten::div(%2006, %2007), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.91 : Tensor = aten::add(%2008, %b_2.19, %1995), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.113 : Tensor = prim::GetAttr[name="bias"](%w_1.9) | |
| %weight.121 : Tensor = prim::GetAttr[name="weight"](%w_1.9) | |
| %input.93 : Tensor = aten::linear(%input.91, %weight.121, %bias.113), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.95 : Tensor = aten::gelu(%input.93, %1991), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.97 : Tensor = aten::dropout(%input.95, %1990, %1993), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.115 : Tensor = prim::GetAttr[name="bias"](%w_2.9) | |
| %weight.123 : Tensor = prim::GetAttr[name="weight"](%w_2.9) | |
| %input.99 : Tensor = aten::linear(%input.97, %weight.123, %bias.115), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2018 : Tensor = aten::dropout(%input.99, %1990, %1993), scope: __module.model.transformer_blocks.4.output_sublayer/__module.model.transformer_blocks.4.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.101 : Tensor = aten::add(%x.29, %2018, %1995), scope: __module.model.transformer_blocks.4.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2020 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.4.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2021 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.4.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.31 : Tensor = aten::dropout(%input.101, %2021, %2020), scope: __module.model.transformer_blocks.4.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2023 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2024 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2025 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2026 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %2027 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention/__module.model.transformer_blocks.5.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2028 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2029 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2030 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2031 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2032 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2033 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2034 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm | |
| %2035 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2036 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.21 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.11) | |
| %b_2.21 : Tensor = prim::GetAttr[name="b_2"](%norm.21) | |
| %a_2.21 : Tensor = prim::GetAttr[name="a_2"](%norm.21) | |
| %2040 : int[] = prim::ListConstruct(%2032), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm | |
| %mean.21 : Tensor = aten::mean(%x.31, %2040, %2033, %2034), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2042 : int[] = prim::ListConstruct(%2032), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm | |
| %std.21 : Tensor = aten::std(%x.31, %2042, %2033, %2033), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2044 : Tensor = aten::sub(%x.31, %mean.21, %2035), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2045 : Tensor = aten::mul(%a_2.21, %2044), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2046 : Tensor = aten::add(%std.21, %2036, %2035), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2047 : Tensor = aten::div(%2045, %2046), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.21 : Tensor = aten::add(%2047, %b_2.21, %2035), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2049 : int = aten::size(%query.21, %2031), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2050 : Tensor = aten::linear(%query.21, %weight.45, %bias.41), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2051 : int[] = prim::ListConstruct(%2049, %2032, %2030, %2029), scope: __module.model.transformer_blocks.5.input_sublayer | |
| %2052 : Tensor = aten::view(%2050, %2051), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.23 : Tensor = aten::transpose(%2052, %2035, %2028), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2054 : Tensor = aten::linear(%query.21, %weight.47, %bias.43), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2055 : int[] = prim::ListConstruct(%2049, %2032, %2030, %2029), scope: __module.model.transformer_blocks.5.input_sublayer | |
| %2056 : Tensor = aten::view(%2054, %2055), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.11 : Tensor = aten::transpose(%2056, %2035, %2028), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2058 : Tensor = aten::linear(%query.21, %weight.49, %bias.45), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2059 : int[] = prim::ListConstruct(%2049, %2032, %2030, %2029), scope: __module.model.transformer_blocks.5.input_sublayer | |
| %2060 : Tensor = aten::view(%2058, %2059), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.11 : Tensor = aten::transpose(%2060, %2035, %2028), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2062 : Tensor = aten::transpose(%key.11, %2024, %2032), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2063 : Tensor = aten::matmul(%query.23, %2062), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.11 : Tensor = aten::div(%2063, %2025), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %2065 : Tensor = aten::eq(%mask, %2031), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %2065 : Tensor = aten::eq(%mask.1, %2031), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.103 : Tensor = aten::masked_fill(%scores.11, %2065, %2026), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.105 : Tensor = aten::softmax(%input.103, %2032, %2034), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.11 : Tensor = aten::dropout(%input.105, %2027, %2033), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention/__module.model.transformer_blocks.5.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.33 : Tensor = aten::matmul(%p_attn.11, %value.11), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %2070 : Tensor = aten::transpose(%x.33, %2035, %2028), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2071 : Tensor = aten::contiguous(%2070, %2031), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2072 : int[] = prim::ListConstruct(%2049, %2032, %2023), scope: __module.model.transformer_blocks.5.input_sublayer | |
| %input.107 : Tensor = aten::view(%2071, %2072), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.109 : Tensor = aten::linear(%input.107, %weight.51, %bias.47), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2075 : Tensor = aten::dropout(%input.109, %2027, %2033), scope: __module.model.transformer_blocks.5.input_sublayer/__module.model.transformer_blocks.5.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.35 : Tensor = aten::add(%x.31, %2075, %2035), scope: __module.model.transformer_blocks.5.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2077 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2078 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %2079 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2080 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2081 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm | |
| %2082 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2083 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.69 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.11) | |
| %norm.23 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.11) | |
| %b_2.23 : Tensor = prim::GetAttr[name="b_2"](%norm.23) | |
| %a_2.23 : Tensor = prim::GetAttr[name="a_2"](%norm.23) | |
| %2088 : int[] = prim::ListConstruct(%2079), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm | |
| %mean.23 : Tensor = aten::mean(%x.35, %2088, %2080, %2081), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2090 : int[] = prim::ListConstruct(%2079), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm | |
| %std.23 : Tensor = aten::std(%x.35, %2090, %2080, %2080), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2092 : Tensor = aten::sub(%x.35, %mean.23, %2082), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2093 : Tensor = aten::mul(%a_2.23, %2092), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2094 : Tensor = aten::add(%std.23, %2083, %2082), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2095 : Tensor = aten::div(%2093, %2094), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.111 : Tensor = aten::add(%2095, %b_2.23, %2082), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.117 : Tensor = prim::GetAttr[name="bias"](%w_1.11) | |
| %weight.125 : Tensor = prim::GetAttr[name="weight"](%w_1.11) | |
| %input.113 : Tensor = aten::linear(%input.111, %weight.125, %bias.117), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.115 : Tensor = aten::gelu(%input.113, %2078), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.117 : Tensor = aten::dropout(%input.115, %2077, %2080), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.119 : Tensor = prim::GetAttr[name="bias"](%w_2.11) | |
| %weight.127 : Tensor = prim::GetAttr[name="weight"](%w_2.11) | |
| %input.119 : Tensor = aten::linear(%input.117, %weight.127, %bias.119), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2105 : Tensor = aten::dropout(%input.119, %2077, %2080), scope: __module.model.transformer_blocks.5.output_sublayer/__module.model.transformer_blocks.5.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.121 : Tensor = aten::add(%x.35, %2105, %2082), scope: __module.model.transformer_blocks.5.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2107 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.5.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2108 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.5.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.37 : Tensor = aten::dropout(%input.121, %2108, %2107), scope: __module.model.transformer_blocks.5.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2110 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2111 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2112 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2113 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %2114 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention/__module.model.transformer_blocks.6.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2115 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2116 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2117 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2118 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2119 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2120 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2121 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm | |
| %2122 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2123 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.25 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.13) | |
| %b_2.25 : Tensor = prim::GetAttr[name="b_2"](%norm.25) | |
| %a_2.25 : Tensor = prim::GetAttr[name="a_2"](%norm.25) | |
| %2127 : int[] = prim::ListConstruct(%2119), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm | |
| %mean.25 : Tensor = aten::mean(%x.37, %2127, %2120, %2121), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2129 : int[] = prim::ListConstruct(%2119), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm | |
| %std.25 : Tensor = aten::std(%x.37, %2129, %2120, %2120), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2131 : Tensor = aten::sub(%x.37, %mean.25, %2122), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2132 : Tensor = aten::mul(%a_2.25, %2131), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2133 : Tensor = aten::add(%std.25, %2123, %2122), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2134 : Tensor = aten::div(%2132, %2133), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.25 : Tensor = aten::add(%2134, %b_2.25, %2122), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2136 : int = aten::size(%query.25, %2118), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2137 : Tensor = aten::linear(%query.25, %weight.53, %bias.49), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2138 : int[] = prim::ListConstruct(%2136, %2119, %2117, %2116), scope: __module.model.transformer_blocks.6.input_sublayer | |
| %2139 : Tensor = aten::view(%2137, %2138), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.27 : Tensor = aten::transpose(%2139, %2122, %2115), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2141 : Tensor = aten::linear(%query.25, %weight.55, %bias.51), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2142 : int[] = prim::ListConstruct(%2136, %2119, %2117, %2116), scope: __module.model.transformer_blocks.6.input_sublayer | |
| %2143 : Tensor = aten::view(%2141, %2142), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.13 : Tensor = aten::transpose(%2143, %2122, %2115), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2145 : Tensor = aten::linear(%query.25, %weight.57, %bias.53), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2146 : int[] = prim::ListConstruct(%2136, %2119, %2117, %2116), scope: __module.model.transformer_blocks.6.input_sublayer | |
| %2147 : Tensor = aten::view(%2145, %2146), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.13 : Tensor = aten::transpose(%2147, %2122, %2115), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2149 : Tensor = aten::transpose(%key.13, %2111, %2119), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2150 : Tensor = aten::matmul(%query.27, %2149), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.13 : Tensor = aten::div(%2150, %2112), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %2152 : Tensor = aten::eq(%mask, %2118), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %2152 : Tensor = aten::eq(%mask.1, %2118), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.123 : Tensor = aten::masked_fill(%scores.13, %2152, %2113), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.125 : Tensor = aten::softmax(%input.123, %2119, %2121), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.13 : Tensor = aten::dropout(%input.125, %2114, %2120), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention/__module.model.transformer_blocks.6.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.39 : Tensor = aten::matmul(%p_attn.13, %value.13), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %2157 : Tensor = aten::transpose(%x.39, %2122, %2115), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2158 : Tensor = aten::contiguous(%2157, %2118), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2159 : int[] = prim::ListConstruct(%2136, %2119, %2110), scope: __module.model.transformer_blocks.6.input_sublayer | |
| %input.127 : Tensor = aten::view(%2158, %2159), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.129 : Tensor = aten::linear(%input.127, %weight.59, %bias.55), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2162 : Tensor = aten::dropout(%input.129, %2114, %2120), scope: __module.model.transformer_blocks.6.input_sublayer/__module.model.transformer_blocks.6.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.41 : Tensor = aten::add(%x.37, %2162, %2122), scope: __module.model.transformer_blocks.6.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2164 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2165 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %2166 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2167 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2168 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm | |
| %2169 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2170 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.81 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.13) | |
| %norm.27 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.13) | |
| %b_2.27 : Tensor = prim::GetAttr[name="b_2"](%norm.27) | |
| %a_2.27 : Tensor = prim::GetAttr[name="a_2"](%norm.27) | |
| %2175 : int[] = prim::ListConstruct(%2166), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm | |
| %mean.27 : Tensor = aten::mean(%x.41, %2175, %2167, %2168), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2177 : int[] = prim::ListConstruct(%2166), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm | |
| %std.27 : Tensor = aten::std(%x.41, %2177, %2167, %2167), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2179 : Tensor = aten::sub(%x.41, %mean.27, %2169), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2180 : Tensor = aten::mul(%a_2.27, %2179), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2181 : Tensor = aten::add(%std.27, %2170, %2169), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2182 : Tensor = aten::div(%2180, %2181), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.131 : Tensor = aten::add(%2182, %b_2.27, %2169), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.121 : Tensor = prim::GetAttr[name="bias"](%w_1.13) | |
| %weight.129 : Tensor = prim::GetAttr[name="weight"](%w_1.13) | |
| %input.133 : Tensor = aten::linear(%input.131, %weight.129, %bias.121), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.135 : Tensor = aten::gelu(%input.133, %2165), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.137 : Tensor = aten::dropout(%input.135, %2164, %2167), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.123 : Tensor = prim::GetAttr[name="bias"](%w_2.13) | |
| %weight.131 : Tensor = prim::GetAttr[name="weight"](%w_2.13) | |
| %input.139 : Tensor = aten::linear(%input.137, %weight.131, %bias.123), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2192 : Tensor = aten::dropout(%input.139, %2164, %2167), scope: __module.model.transformer_blocks.6.output_sublayer/__module.model.transformer_blocks.6.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.141 : Tensor = aten::add(%x.41, %2192, %2169), scope: __module.model.transformer_blocks.6.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2194 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.6.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2195 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.6.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.43 : Tensor = aten::dropout(%input.141, %2195, %2194), scope: __module.model.transformer_blocks.6.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2197 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2198 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2199 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2200 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %2201 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention/__module.model.transformer_blocks.7.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2202 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2203 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2204 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2205 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2206 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2207 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2208 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm | |
| %2209 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2210 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.29 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.15) | |
| %b_2.29 : Tensor = prim::GetAttr[name="b_2"](%norm.29) | |
| %a_2.29 : Tensor = prim::GetAttr[name="a_2"](%norm.29) | |
| %2214 : int[] = prim::ListConstruct(%2206), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm | |
| %mean.29 : Tensor = aten::mean(%x.43, %2214, %2207, %2208), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2216 : int[] = prim::ListConstruct(%2206), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm | |
| %std.29 : Tensor = aten::std(%x.43, %2216, %2207, %2207), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2218 : Tensor = aten::sub(%x.43, %mean.29, %2209), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2219 : Tensor = aten::mul(%a_2.29, %2218), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2220 : Tensor = aten::add(%std.29, %2210, %2209), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2221 : Tensor = aten::div(%2219, %2220), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.29 : Tensor = aten::add(%2221, %b_2.29, %2209), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2223 : int = aten::size(%query.29, %2205), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2224 : Tensor = aten::linear(%query.29, %weight.61, %bias.57), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2225 : int[] = prim::ListConstruct(%2223, %2206, %2204, %2203), scope: __module.model.transformer_blocks.7.input_sublayer | |
| %2226 : Tensor = aten::view(%2224, %2225), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.31 : Tensor = aten::transpose(%2226, %2209, %2202), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2228 : Tensor = aten::linear(%query.29, %weight.63, %bias.59), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2229 : int[] = prim::ListConstruct(%2223, %2206, %2204, %2203), scope: __module.model.transformer_blocks.7.input_sublayer | |
| %2230 : Tensor = aten::view(%2228, %2229), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.15 : Tensor = aten::transpose(%2230, %2209, %2202), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2232 : Tensor = aten::linear(%query.29, %weight.65, %bias.61), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2233 : int[] = prim::ListConstruct(%2223, %2206, %2204, %2203), scope: __module.model.transformer_blocks.7.input_sublayer | |
| %2234 : Tensor = aten::view(%2232, %2233), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.15 : Tensor = aten::transpose(%2234, %2209, %2202), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2236 : Tensor = aten::transpose(%key.15, %2198, %2206), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2237 : Tensor = aten::matmul(%query.31, %2236), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.15 : Tensor = aten::div(%2237, %2199), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %2239 : Tensor = aten::eq(%mask, %2205), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %2239 : Tensor = aten::eq(%mask.1, %2205), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.143 : Tensor = aten::masked_fill(%scores.15, %2239, %2200), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.145 : Tensor = aten::softmax(%input.143, %2206, %2208), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.15 : Tensor = aten::dropout(%input.145, %2201, %2207), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention/__module.model.transformer_blocks.7.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.45 : Tensor = aten::matmul(%p_attn.15, %value.15), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %2244 : Tensor = aten::transpose(%x.45, %2209, %2202), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2245 : Tensor = aten::contiguous(%2244, %2205), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2246 : int[] = prim::ListConstruct(%2223, %2206, %2197), scope: __module.model.transformer_blocks.7.input_sublayer | |
| %input.147 : Tensor = aten::view(%2245, %2246), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.149 : Tensor = aten::linear(%input.147, %weight.67, %bias.63), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2249 : Tensor = aten::dropout(%input.149, %2201, %2207), scope: __module.model.transformer_blocks.7.input_sublayer/__module.model.transformer_blocks.7.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.47 : Tensor = aten::add(%x.43, %2249, %2209), scope: __module.model.transformer_blocks.7.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2251 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2252 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %2253 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2254 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2255 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm | |
| %2256 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2257 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.93 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.15) | |
| %norm.31 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.15) | |
| %b_2.31 : Tensor = prim::GetAttr[name="b_2"](%norm.31) | |
| %a_2.31 : Tensor = prim::GetAttr[name="a_2"](%norm.31) | |
| %2262 : int[] = prim::ListConstruct(%2253), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm | |
| %mean.31 : Tensor = aten::mean(%x.47, %2262, %2254, %2255), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2264 : int[] = prim::ListConstruct(%2253), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm | |
| %std.31 : Tensor = aten::std(%x.47, %2264, %2254, %2254), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2266 : Tensor = aten::sub(%x.47, %mean.31, %2256), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2267 : Tensor = aten::mul(%a_2.31, %2266), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2268 : Tensor = aten::add(%std.31, %2257, %2256), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2269 : Tensor = aten::div(%2267, %2268), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.151 : Tensor = aten::add(%2269, %b_2.31, %2256), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.125 : Tensor = prim::GetAttr[name="bias"](%w_1.15) | |
| %weight.133 : Tensor = prim::GetAttr[name="weight"](%w_1.15) | |
| %input.153 : Tensor = aten::linear(%input.151, %weight.133, %bias.125), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.155 : Tensor = aten::gelu(%input.153, %2252), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.157 : Tensor = aten::dropout(%input.155, %2251, %2254), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.127 : Tensor = prim::GetAttr[name="bias"](%w_2.15) | |
| %weight.135 : Tensor = prim::GetAttr[name="weight"](%w_2.15) | |
| %input.159 : Tensor = aten::linear(%input.157, %weight.135, %bias.127), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2279 : Tensor = aten::dropout(%input.159, %2251, %2254), scope: __module.model.transformer_blocks.7.output_sublayer/__module.model.transformer_blocks.7.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.161 : Tensor = aten::add(%x.47, %2279, %2256), scope: __module.model.transformer_blocks.7.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2281 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.7.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2282 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.7.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.49 : Tensor = aten::dropout(%input.161, %2282, %2281), scope: __module.model.transformer_blocks.7.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2284 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2285 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2286 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2287 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %2288 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention/__module.model.transformer_blocks.8.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2289 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2290 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2291 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2292 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2293 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2294 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2295 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm | |
| %2296 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2297 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.33 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.17) | |
| %b_2.33 : Tensor = prim::GetAttr[name="b_2"](%norm.33) | |
| %a_2.33 : Tensor = prim::GetAttr[name="a_2"](%norm.33) | |
| %2301 : int[] = prim::ListConstruct(%2293), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm | |
| %mean.33 : Tensor = aten::mean(%x.49, %2301, %2294, %2295), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2303 : int[] = prim::ListConstruct(%2293), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm | |
| %std.33 : Tensor = aten::std(%x.49, %2303, %2294, %2294), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2305 : Tensor = aten::sub(%x.49, %mean.33, %2296), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2306 : Tensor = aten::mul(%a_2.33, %2305), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2307 : Tensor = aten::add(%std.33, %2297, %2296), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2308 : Tensor = aten::div(%2306, %2307), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.33 : Tensor = aten::add(%2308, %b_2.33, %2296), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2310 : int = aten::size(%query.33, %2292), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2311 : Tensor = aten::linear(%query.33, %weight.69, %bias.65), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2312 : int[] = prim::ListConstruct(%2310, %2293, %2291, %2290), scope: __module.model.transformer_blocks.8.input_sublayer | |
| %2313 : Tensor = aten::view(%2311, %2312), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.35 : Tensor = aten::transpose(%2313, %2296, %2289), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2315 : Tensor = aten::linear(%query.33, %weight.71, %bias.67), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2316 : int[] = prim::ListConstruct(%2310, %2293, %2291, %2290), scope: __module.model.transformer_blocks.8.input_sublayer | |
| %2317 : Tensor = aten::view(%2315, %2316), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.17 : Tensor = aten::transpose(%2317, %2296, %2289), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2319 : Tensor = aten::linear(%query.33, %weight.73, %bias.69), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2320 : int[] = prim::ListConstruct(%2310, %2293, %2291, %2290), scope: __module.model.transformer_blocks.8.input_sublayer | |
| %2321 : Tensor = aten::view(%2319, %2320), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.17 : Tensor = aten::transpose(%2321, %2296, %2289), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2323 : Tensor = aten::transpose(%key.17, %2285, %2293), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2324 : Tensor = aten::matmul(%query.35, %2323), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.17 : Tensor = aten::div(%2324, %2286), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %2326 : Tensor = aten::eq(%mask, %2292), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %2326 : Tensor = aten::eq(%mask.1, %2292), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.163 : Tensor = aten::masked_fill(%scores.17, %2326, %2287), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.165 : Tensor = aten::softmax(%input.163, %2293, %2295), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.17 : Tensor = aten::dropout(%input.165, %2288, %2294), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention/__module.model.transformer_blocks.8.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.51 : Tensor = aten::matmul(%p_attn.17, %value.17), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %2331 : Tensor = aten::transpose(%x.51, %2296, %2289), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2332 : Tensor = aten::contiguous(%2331, %2292), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2333 : int[] = prim::ListConstruct(%2310, %2293, %2284), scope: __module.model.transformer_blocks.8.input_sublayer | |
| %input.167 : Tensor = aten::view(%2332, %2333), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.169 : Tensor = aten::linear(%input.167, %weight.75, %bias.71), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2336 : Tensor = aten::dropout(%input.169, %2288, %2294), scope: __module.model.transformer_blocks.8.input_sublayer/__module.model.transformer_blocks.8.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.53 : Tensor = aten::add(%x.49, %2336, %2296), scope: __module.model.transformer_blocks.8.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2338 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2339 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %2340 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2341 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2342 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm | |
| %2343 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2344 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.105 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.17) | |
| %norm.35 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.17) | |
| %b_2.35 : Tensor = prim::GetAttr[name="b_2"](%norm.35) | |
| %a_2.35 : Tensor = prim::GetAttr[name="a_2"](%norm.35) | |
| %2349 : int[] = prim::ListConstruct(%2340), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm | |
| %mean.35 : Tensor = aten::mean(%x.53, %2349, %2341, %2342), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2351 : int[] = prim::ListConstruct(%2340), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm | |
| %std.35 : Tensor = aten::std(%x.53, %2351, %2341, %2341), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2353 : Tensor = aten::sub(%x.53, %mean.35, %2343), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2354 : Tensor = aten::mul(%a_2.35, %2353), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2355 : Tensor = aten::add(%std.35, %2344, %2343), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2356 : Tensor = aten::div(%2354, %2355), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.171 : Tensor = aten::add(%2356, %b_2.35, %2343), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.129 : Tensor = prim::GetAttr[name="bias"](%w_1.17) | |
| %weight.137 : Tensor = prim::GetAttr[name="weight"](%w_1.17) | |
| %input.173 : Tensor = aten::linear(%input.171, %weight.137, %bias.129), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.175 : Tensor = aten::gelu(%input.173, %2339), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.177 : Tensor = aten::dropout(%input.175, %2338, %2341), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.131 : Tensor = prim::GetAttr[name="bias"](%w_2.17) | |
| %weight.139 : Tensor = prim::GetAttr[name="weight"](%w_2.17) | |
| %input.179 : Tensor = aten::linear(%input.177, %weight.139, %bias.131), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2366 : Tensor = aten::dropout(%input.179, %2338, %2341), scope: __module.model.transformer_blocks.8.output_sublayer/__module.model.transformer_blocks.8.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.181 : Tensor = aten::add(%x.53, %2366, %2343), scope: __module.model.transformer_blocks.8.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2368 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.8.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2369 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.8.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.55 : Tensor = aten::dropout(%input.181, %2369, %2368), scope: __module.model.transformer_blocks.8.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2371 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2372 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2373 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2374 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %2375 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention/__module.model.transformer_blocks.9.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2376 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2377 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2378 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2379 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2380 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2381 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2382 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm | |
| %2383 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2384 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.37 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.19) | |
| %b_2.37 : Tensor = prim::GetAttr[name="b_2"](%norm.37) | |
| %a_2.37 : Tensor = prim::GetAttr[name="a_2"](%norm.37) | |
| %2388 : int[] = prim::ListConstruct(%2380), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm | |
| %mean.37 : Tensor = aten::mean(%x.55, %2388, %2381, %2382), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2390 : int[] = prim::ListConstruct(%2380), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm | |
| %std.37 : Tensor = aten::std(%x.55, %2390, %2381, %2381), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2392 : Tensor = aten::sub(%x.55, %mean.37, %2383), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2393 : Tensor = aten::mul(%a_2.37, %2392), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2394 : Tensor = aten::add(%std.37, %2384, %2383), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2395 : Tensor = aten::div(%2393, %2394), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.37 : Tensor = aten::add(%2395, %b_2.37, %2383), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2397 : int = aten::size(%query.37, %2379), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2398 : Tensor = aten::linear(%query.37, %weight.77, %bias.73), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2399 : int[] = prim::ListConstruct(%2397, %2380, %2378, %2377), scope: __module.model.transformer_blocks.9.input_sublayer | |
| %2400 : Tensor = aten::view(%2398, %2399), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.39 : Tensor = aten::transpose(%2400, %2383, %2376), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2402 : Tensor = aten::linear(%query.37, %weight.79, %bias.75), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2403 : int[] = prim::ListConstruct(%2397, %2380, %2378, %2377), scope: __module.model.transformer_blocks.9.input_sublayer | |
| %2404 : Tensor = aten::view(%2402, %2403), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.19 : Tensor = aten::transpose(%2404, %2383, %2376), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2406 : Tensor = aten::linear(%query.37, %weight.81, %bias.77), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2407 : int[] = prim::ListConstruct(%2397, %2380, %2378, %2377), scope: __module.model.transformer_blocks.9.input_sublayer | |
| %2408 : Tensor = aten::view(%2406, %2407), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.19 : Tensor = aten::transpose(%2408, %2383, %2376), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2410 : Tensor = aten::transpose(%key.19, %2372, %2380), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2411 : Tensor = aten::matmul(%query.39, %2410), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.19 : Tensor = aten::div(%2411, %2373), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %2413 : Tensor = aten::eq(%mask, %2379), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %2413 : Tensor = aten::eq(%mask.1, %2379), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.183 : Tensor = aten::masked_fill(%scores.19, %2413, %2374), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.185 : Tensor = aten::softmax(%input.183, %2380, %2382), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.19 : Tensor = aten::dropout(%input.185, %2375, %2381), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention/__module.model.transformer_blocks.9.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.57 : Tensor = aten::matmul(%p_attn.19, %value.19), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %2418 : Tensor = aten::transpose(%x.57, %2383, %2376), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2419 : Tensor = aten::contiguous(%2418, %2379), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2420 : int[] = prim::ListConstruct(%2397, %2380, %2371), scope: __module.model.transformer_blocks.9.input_sublayer | |
| %input.187 : Tensor = aten::view(%2419, %2420), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.189 : Tensor = aten::linear(%input.187, %weight.83, %bias.79), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2423 : Tensor = aten::dropout(%input.189, %2375, %2381), scope: __module.model.transformer_blocks.9.input_sublayer/__module.model.transformer_blocks.9.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.59 : Tensor = aten::add(%x.55, %2423, %2383), scope: __module.model.transformer_blocks.9.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2425 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2426 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %2427 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2428 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2429 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm | |
| %2430 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2431 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.117 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.19) | |
| %norm.39 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.19) | |
| %b_2.39 : Tensor = prim::GetAttr[name="b_2"](%norm.39) | |
| %a_2.39 : Tensor = prim::GetAttr[name="a_2"](%norm.39) | |
| %2436 : int[] = prim::ListConstruct(%2427), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm | |
| %mean.39 : Tensor = aten::mean(%x.59, %2436, %2428, %2429), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2438 : int[] = prim::ListConstruct(%2427), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm | |
| %std.39 : Tensor = aten::std(%x.59, %2438, %2428, %2428), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2440 : Tensor = aten::sub(%x.59, %mean.39, %2430), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2441 : Tensor = aten::mul(%a_2.39, %2440), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2442 : Tensor = aten::add(%std.39, %2431, %2430), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2443 : Tensor = aten::div(%2441, %2442), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.191 : Tensor = aten::add(%2443, %b_2.39, %2430), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.133 : Tensor = prim::GetAttr[name="bias"](%w_1.19) | |
| %weight.141 : Tensor = prim::GetAttr[name="weight"](%w_1.19) | |
| %input.193 : Tensor = aten::linear(%input.191, %weight.141, %bias.133), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.195 : Tensor = aten::gelu(%input.193, %2426), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.197 : Tensor = aten::dropout(%input.195, %2425, %2428), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.135 : Tensor = prim::GetAttr[name="bias"](%w_2.19) | |
| %weight.143 : Tensor = prim::GetAttr[name="weight"](%w_2.19) | |
| %input.199 : Tensor = aten::linear(%input.197, %weight.143, %bias.135), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2453 : Tensor = aten::dropout(%input.199, %2425, %2428), scope: __module.model.transformer_blocks.9.output_sublayer/__module.model.transformer_blocks.9.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.201 : Tensor = aten::add(%x.59, %2453, %2430), scope: __module.model.transformer_blocks.9.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2455 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.9.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2456 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.9.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.61 : Tensor = aten::dropout(%input.201, %2456, %2455), scope: __module.model.transformer_blocks.9.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2458 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2459 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2460 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2461 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %2462 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention/__module.model.transformer_blocks.10.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2463 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2464 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2465 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2466 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2467 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2468 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2469 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm | |
| %2470 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2471 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.41 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer.21) | |
| %b_2.41 : Tensor = prim::GetAttr[name="b_2"](%norm.41) | |
| %a_2.41 : Tensor = prim::GetAttr[name="a_2"](%norm.41) | |
| %2475 : int[] = prim::ListConstruct(%2467), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm | |
| %mean.41 : Tensor = aten::mean(%x.61, %2475, %2468, %2469), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2477 : int[] = prim::ListConstruct(%2467), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm | |
| %std.41 : Tensor = aten::std(%x.61, %2477, %2468, %2468), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2479 : Tensor = aten::sub(%x.61, %mean.41, %2470), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2480 : Tensor = aten::mul(%a_2.41, %2479), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2481 : Tensor = aten::add(%std.41, %2471, %2470), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2482 : Tensor = aten::div(%2480, %2481), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.41 : Tensor = aten::add(%2482, %b_2.41, %2470), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2484 : int = aten::size(%query.41, %2466), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2485 : Tensor = aten::linear(%query.41, %weight.85, %bias.81), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2486 : int[] = prim::ListConstruct(%2484, %2467, %2465, %2464), scope: __module.model.transformer_blocks.10.input_sublayer | |
| %2487 : Tensor = aten::view(%2485, %2486), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query.43 : Tensor = aten::transpose(%2487, %2470, %2463), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2489 : Tensor = aten::linear(%query.41, %weight.87, %bias.83), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2490 : int[] = prim::ListConstruct(%2484, %2467, %2465, %2464), scope: __module.model.transformer_blocks.10.input_sublayer | |
| %2491 : Tensor = aten::view(%2489, %2490), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key.21 : Tensor = aten::transpose(%2491, %2470, %2463), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2493 : Tensor = aten::linear(%query.41, %weight.89, %bias.85), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2494 : int[] = prim::ListConstruct(%2484, %2467, %2465, %2464), scope: __module.model.transformer_blocks.10.input_sublayer | |
| %2495 : Tensor = aten::view(%2493, %2494), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value.21 : Tensor = aten::transpose(%2495, %2470, %2463), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2497 : Tensor = aten::transpose(%key.21, %2459, %2467), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2498 : Tensor = aten::matmul(%query.43, %2497), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores.21 : Tensor = aten::div(%2498, %2460), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %2500 : Tensor = aten::eq(%mask, %2466), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %2500 : Tensor = aten::eq(%mask.1, %2466), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.203 : Tensor = aten::masked_fill(%scores.21, %2500, %2461), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.205 : Tensor = aten::softmax(%input.203, %2467, %2469), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn.21 : Tensor = aten::dropout(%input.205, %2462, %2468), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention/__module.model.transformer_blocks.10.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.63 : Tensor = aten::matmul(%p_attn.21, %value.21), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %2505 : Tensor = aten::transpose(%x.63, %2470, %2463), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2506 : Tensor = aten::contiguous(%2505, %2466), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2507 : int[] = prim::ListConstruct(%2484, %2467, %2458), scope: __module.model.transformer_blocks.10.input_sublayer | |
| %input.207 : Tensor = aten::view(%2506, %2507), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.209 : Tensor = aten::linear(%input.207, %weight.91, %bias.87), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2510 : Tensor = aten::dropout(%input.209, %2462, %2468), scope: __module.model.transformer_blocks.10.input_sublayer/__module.model.transformer_blocks.10.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.65 : Tensor = aten::add(%x.61, %2510, %2470), scope: __module.model.transformer_blocks.10.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2512 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2513 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %2514 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2515 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2516 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm | |
| %2517 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2518 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.129 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer.21) | |
| %norm.43 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer.21) | |
| %b_2.43 : Tensor = prim::GetAttr[name="b_2"](%norm.43) | |
| %a_2.43 : Tensor = prim::GetAttr[name="a_2"](%norm.43) | |
| %2523 : int[] = prim::ListConstruct(%2514), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm | |
| %mean.43 : Tensor = aten::mean(%x.65, %2523, %2515, %2516), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2525 : int[] = prim::ListConstruct(%2514), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm | |
| %std.43 : Tensor = aten::std(%x.65, %2525, %2515, %2515), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2527 : Tensor = aten::sub(%x.65, %mean.43, %2517), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2528 : Tensor = aten::mul(%a_2.43, %2527), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2529 : Tensor = aten::add(%std.43, %2518, %2517), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2530 : Tensor = aten::div(%2528, %2529), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.211 : Tensor = aten::add(%2530, %b_2.43, %2517), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.137 : Tensor = prim::GetAttr[name="bias"](%w_1.21) | |
| %weight.145 : Tensor = prim::GetAttr[name="weight"](%w_1.21) | |
| %input.213 : Tensor = aten::linear(%input.211, %weight.145, %bias.137), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.215 : Tensor = aten::gelu(%input.213, %2513), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.217 : Tensor = aten::dropout(%input.215, %2512, %2515), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias.139 : Tensor = prim::GetAttr[name="bias"](%w_2.21) | |
| %weight.147 : Tensor = prim::GetAttr[name="weight"](%w_2.21) | |
| %input.219 : Tensor = aten::linear(%input.217, %weight.147, %bias.139), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2540 : Tensor = aten::dropout(%input.219, %2512, %2515), scope: __module.model.transformer_blocks.10.output_sublayer/__module.model.transformer_blocks.10.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input.221 : Tensor = aten::add(%x.65, %2540, %2517), scope: __module.model.transformer_blocks.10.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2542 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.10.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2543 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.10.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.67 : Tensor = aten::dropout(%input.221, %2543, %2542), scope: __module.model.transformer_blocks.10.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2545 : int = prim::Constant[value=768](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2546 : int = prim::Constant[value=-2](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2547 : Tensor = prim::Constant[value={8}](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2548 : float = prim::Constant[value=-1000000000.](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %2549 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention/__module.model.transformer_blocks.11.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2550 : int = prim::Constant[value=2](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2551 : int = prim::Constant[value=64](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2552 : int = prim::Constant[value=12](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2553 : int = prim::Constant[value=0](), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2554 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2555 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2556 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm | |
| %2557 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2558 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %norm.45 : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%input_sublayer) | |
| %b_2.45 : Tensor = prim::GetAttr[name="b_2"](%norm.45) | |
| %a_2.45 : Tensor = prim::GetAttr[name="a_2"](%norm.45) | |
| %2562 : int[] = prim::ListConstruct(%2554), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm | |
| %mean.45 : Tensor = aten::mean(%x.67, %2562, %2555, %2556), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2564 : int[] = prim::ListConstruct(%2554), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm | |
| %std.45 : Tensor = aten::std(%x.67, %2564, %2555, %2555), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2566 : Tensor = aten::sub(%x.67, %mean.45, %2557), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2567 : Tensor = aten::mul(%a_2.45, %2566), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2568 : Tensor = aten::add(%std.45, %2558, %2557), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2569 : Tensor = aten::div(%2567, %2568), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %query.45 : Tensor = aten::add(%2569, %b_2.45, %2557), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2571 : int = aten::size(%query.45, %2553), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:34:0 | |
| %2572 : Tensor = aten::linear(%query.45, %weight.93, %bias.89), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.linear_layers.0 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2573 : int[] = prim::ListConstruct(%2571, %2554, %2552, %2551), scope: __module.model.transformer_blocks.11.input_sublayer | |
| %2574 : Tensor = aten::view(%2572, %2573), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %query : Tensor = aten::transpose(%2574, %2557, %2550), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2576 : Tensor = aten::linear(%query.45, %weight.95, %bias.91), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.linear_layers.1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2577 : int[] = prim::ListConstruct(%2571, %2554, %2552, %2551), scope: __module.model.transformer_blocks.11.input_sublayer | |
| %2578 : Tensor = aten::view(%2576, %2577), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %key : Tensor = aten::transpose(%2578, %2557, %2550), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2580 : Tensor = aten::linear(%query.45, %weight.97, %bias.93), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.linear_layers.2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2581 : int[] = prim::ListConstruct(%2571, %2554, %2552, %2551), scope: __module.model.transformer_blocks.11.input_sublayer | |
| %2582 : Tensor = aten::view(%2580, %2581), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %value : Tensor = aten::transpose(%2582, %2557, %2550), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:37:0 | |
| %2584 : Tensor = aten::transpose(%key, %2546, %2554), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %2585 : Tensor = aten::matmul(%query, %2584), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| %scores : Tensor = aten::div(%2585, %2547), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:15:0 | |
| - %2587 : Tensor = aten::eq(%mask, %2553), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| + %2587 : Tensor = aten::eq(%mask.1, %2553), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| ? ++ | |
| %input.223 : Tensor = aten::masked_fill(%scores, %2587, %2548), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:27:0 | |
| %input.225 : Tensor = aten::softmax(%input.223, %2554, %2556), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1783:0 | |
| %p_attn : Tensor = aten::dropout(%input.225, %2549, %2555), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention/__module.model.transformer_blocks.11.lambda_module.attention.dropout.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x.69 : Tensor = aten::matmul(%p_attn, %value), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.attention # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/single.py:33:0 | |
| %2592 : Tensor = aten::transpose(%x.69, %2557, %2550), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2593 : Tensor = aten::contiguous(%2592, %2553), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %2594 : int[] = prim::ListConstruct(%2571, %2554, %2545), scope: __module.model.transformer_blocks.11.input_sublayer | |
| %input.227 : Tensor = aten::view(%2593, %2594), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/attention/multi_head.py:44:0 | |
| %input.229 : Tensor = aten::linear(%input.227, %weight.99, %bias.95), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.lambda_module.attention.output_linear # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2597 : Tensor = aten::dropout(%input.229, %2549, %2555), scope: __module.model.transformer_blocks.11.input_sublayer/__module.model.transformer_blocks.11.input_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %x : Tensor = aten::add(%x.67, %2597, %2557), scope: __module.model.transformer_blocks.11.input_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2599 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2600 : str = prim::Constant[value="none"](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %2601 : int = prim::Constant[value=-1](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2602 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2603 : NoneType = prim::Constant(), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm | |
| %2604 : int = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2605 : Tensor = prim::Constant[value={1e-06}](), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %dropout.141 : __torch__.torch.nn.modules.dropout.Dropout = prim::GetAttr[name="dropout"](%output_sublayer) | |
| %norm : __torch__.bert_pytorch.model.utils.layer_norm.LayerNorm = prim::GetAttr[name="norm"](%output_sublayer) | |
| %b_2 : Tensor = prim::GetAttr[name="b_2"](%norm) | |
| %a_2 : Tensor = prim::GetAttr[name="a_2"](%norm) | |
| %2610 : int[] = prim::ListConstruct(%2601), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm | |
| %mean : Tensor = aten::mean(%x, %2610, %2602, %2603), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:15:0 | |
| %2612 : int[] = prim::ListConstruct(%2601), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm | |
| %std : Tensor = aten::std(%x, %2612, %2602, %2602), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:16:0 | |
| %2614 : Tensor = aten::sub(%x, %mean, %2604), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2615 : Tensor = aten::mul(%a_2, %2614), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2616 : Tensor = aten::add(%std, %2605, %2604), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %2617 : Tensor = aten::div(%2615, %2616), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %input.231 : Tensor = aten::add(%2617, %b_2, %2604), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.norm # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/layer_norm.py:17:0 | |
| %bias.141 : Tensor = prim::GetAttr[name="bias"](%w_1) | |
| %weight.149 : Tensor = prim::GetAttr[name="weight"](%w_1) | |
| %input.233 : Tensor = aten::linear(%input.231, %weight.149, %bias.141), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.w_1 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %input.235 : Tensor = aten::gelu(%input.233, %2600), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.activation # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/activation.py:681:0 | |
| %input.237 : Tensor = aten::dropout(%input.235, %2599, %2602), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %bias : Tensor = prim::GetAttr[name="bias"](%w_2) | |
| %weight : Tensor = prim::GetAttr[name="weight"](%w_2) | |
| %input.239 : Tensor = aten::linear(%input.237, %weight, %bias), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.feed_forward.w_2 # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/modules/linear.py:103:0 | |
| %2627 : Tensor = aten::dropout(%input.239, %2599, %2602), scope: __module.model.transformer_blocks.11.output_sublayer/__module.model.transformer_blocks.11.output_sublayer.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %input : Tensor = aten::add(%x, %2627, %2604), scope: __module.model.transformer_blocks.11.output_sublayer # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/bert_pytorch/model/utils/sublayer.py:19:0 | |
| %2629 : bool = prim::Constant[value=1](), scope: __module.model.transformer_blocks.11.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2630 : float = prim::Constant[value=0.10000000000000001](), scope: __module.model.transformer_blocks.11.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| %2631 : Tensor = aten::dropout(%input, %2630, %2629), scope: __module.model.transformer_blocks.11.dropout # /home/prashant/dSHARK/shark.venv/lib/python3.9/site-packages/torch/nn/functional.py:1243:0 | |
| return (%2631) | |
| First diverging operator: | |
| Node diff: | |
| - %model : __torch__.bert_pytorch.model.bert.BERT = prim::GetAttr[name="model"](%self.1) | |
| + %model : __torch__.bert_pytorch.model.bert.___torch_mangle_764.BERT = prim::GetAttr[name="model"](%self.1) | |
| ? ++++++++++++++++++++ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment